]>
Commit | Line | Data |
---|---|---|
8e3f5fc1 PM |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright IBM Corporation, 2018 | |
4 | * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com> | |
5 | * Paul Mackerras <paulus@ozlabs.org> | |
6 | * | |
7 | * Description: KVM functions specific to running nested KVM-HV guests | |
8 | * on Book3S processors (specifically POWER9 and later). | |
9 | */ | |
10 | ||
11 | #include <linux/kernel.h> | |
12 | #include <linux/kvm_host.h> | |
8cf531ed | 13 | #include <linux/llist.h> |
65fddcfc | 14 | #include <linux/pgtable.h> |
8e3f5fc1 PM |
15 | |
16 | #include <asm/kvm_ppc.h> | |
fd10be25 | 17 | #include <asm/kvm_book3s.h> |
8e3f5fc1 | 18 | #include <asm/mmu.h> |
8e3f5fc1 | 19 | #include <asm/pgalloc.h> |
fd10be25 SJS |
20 | #include <asm/pte-walk.h> |
21 | #include <asm/reg.h> | |
81468083 | 22 | #include <asm/plpar_wrappers.h> |
8e3f5fc1 PM |
23 | |
24 | static struct patb_entry *pseries_partition_tb; | |
25 | ||
26 | static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp); | |
8cf531ed | 27 | static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free); |
8e3f5fc1 | 28 | |
360cae31 PM |
29 | void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) |
30 | { | |
31 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | |
32 | ||
13c7bb3c | 33 | hr->pcr = vc->pcr | PCR_MASK; |
360cae31 PM |
34 | hr->dpdes = vc->dpdes; |
35 | hr->hfscr = vcpu->arch.hfscr; | |
36 | hr->tb_offset = vc->tb_offset; | |
122954ed RB |
37 | hr->dawr0 = vcpu->arch.dawr0; |
38 | hr->dawrx0 = vcpu->arch.dawrx0; | |
360cae31 PM |
39 | hr->ciabr = vcpu->arch.ciabr; |
40 | hr->purr = vcpu->arch.purr; | |
41 | hr->spurr = vcpu->arch.spurr; | |
42 | hr->ic = vcpu->arch.ic; | |
43 | hr->vtb = vc->vtb; | |
44 | hr->srr0 = vcpu->arch.shregs.srr0; | |
45 | hr->srr1 = vcpu->arch.shregs.srr1; | |
46 | hr->sprg[0] = vcpu->arch.shregs.sprg0; | |
47 | hr->sprg[1] = vcpu->arch.shregs.sprg1; | |
48 | hr->sprg[2] = vcpu->arch.shregs.sprg2; | |
49 | hr->sprg[3] = vcpu->arch.shregs.sprg3; | |
50 | hr->pidr = vcpu->arch.pid; | |
51 | hr->cfar = vcpu->arch.cfar; | |
52 | hr->ppr = vcpu->arch.ppr; | |
bd1de1a0 RB |
53 | hr->dawr1 = vcpu->arch.dawr1; |
54 | hr->dawrx1 = vcpu->arch.dawrx1; | |
360cae31 PM |
55 | } |
56 | ||
51696f39 NC |
57 | /* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */ |
58 | static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs) | |
10b5022d SJS |
59 | { |
60 | unsigned long *addr = (unsigned long *) regs; | |
61 | ||
62 | for (; addr < ((unsigned long *) (regs + 1)); addr++) | |
63 | *addr = swab64(*addr); | |
64 | } | |
65 | ||
66 | static void byteswap_hv_regs(struct hv_guest_state *hr) | |
67 | { | |
68 | hr->version = swab64(hr->version); | |
69 | hr->lpid = swab32(hr->lpid); | |
70 | hr->vcpu_token = swab32(hr->vcpu_token); | |
71 | hr->lpcr = swab64(hr->lpcr); | |
13c7bb3c | 72 | hr->pcr = swab64(hr->pcr) | PCR_MASK; |
10b5022d SJS |
73 | hr->amor = swab64(hr->amor); |
74 | hr->dpdes = swab64(hr->dpdes); | |
75 | hr->hfscr = swab64(hr->hfscr); | |
76 | hr->tb_offset = swab64(hr->tb_offset); | |
77 | hr->dawr0 = swab64(hr->dawr0); | |
78 | hr->dawrx0 = swab64(hr->dawrx0); | |
79 | hr->ciabr = swab64(hr->ciabr); | |
80 | hr->hdec_expiry = swab64(hr->hdec_expiry); | |
81 | hr->purr = swab64(hr->purr); | |
82 | hr->spurr = swab64(hr->spurr); | |
83 | hr->ic = swab64(hr->ic); | |
84 | hr->vtb = swab64(hr->vtb); | |
85 | hr->hdar = swab64(hr->hdar); | |
86 | hr->hdsisr = swab64(hr->hdsisr); | |
87 | hr->heir = swab64(hr->heir); | |
88 | hr->asdr = swab64(hr->asdr); | |
89 | hr->srr0 = swab64(hr->srr0); | |
90 | hr->srr1 = swab64(hr->srr1); | |
91 | hr->sprg[0] = swab64(hr->sprg[0]); | |
92 | hr->sprg[1] = swab64(hr->sprg[1]); | |
93 | hr->sprg[2] = swab64(hr->sprg[2]); | |
94 | hr->sprg[3] = swab64(hr->sprg[3]); | |
95 | hr->pidr = swab64(hr->pidr); | |
96 | hr->cfar = swab64(hr->cfar); | |
97 | hr->ppr = swab64(hr->ppr); | |
bd1de1a0 RB |
98 | hr->dawr1 = swab64(hr->dawr1); |
99 | hr->dawrx1 = swab64(hr->dawrx1); | |
10b5022d SJS |
100 | } |
101 | ||
f2e29db1 | 102 | static void save_hv_return_state(struct kvm_vcpu *vcpu, |
360cae31 PM |
103 | struct hv_guest_state *hr) |
104 | { | |
105 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | |
106 | ||
107 | hr->dpdes = vc->dpdes; | |
360cae31 PM |
108 | hr->purr = vcpu->arch.purr; |
109 | hr->spurr = vcpu->arch.spurr; | |
110 | hr->ic = vcpu->arch.ic; | |
111 | hr->vtb = vc->vtb; | |
112 | hr->srr0 = vcpu->arch.shregs.srr0; | |
113 | hr->srr1 = vcpu->arch.shregs.srr1; | |
114 | hr->sprg[0] = vcpu->arch.shregs.sprg0; | |
115 | hr->sprg[1] = vcpu->arch.shregs.sprg1; | |
116 | hr->sprg[2] = vcpu->arch.shregs.sprg2; | |
117 | hr->sprg[3] = vcpu->arch.shregs.sprg3; | |
118 | hr->pidr = vcpu->arch.pid; | |
119 | hr->cfar = vcpu->arch.cfar; | |
120 | hr->ppr = vcpu->arch.ppr; | |
f2e29db1 | 121 | switch (vcpu->arch.trap) { |
360cae31 PM |
122 | case BOOK3S_INTERRUPT_H_DATA_STORAGE: |
123 | hr->hdar = vcpu->arch.fault_dar; | |
124 | hr->hdsisr = vcpu->arch.fault_dsisr; | |
125 | hr->asdr = vcpu->arch.fault_gpa; | |
126 | break; | |
127 | case BOOK3S_INTERRUPT_H_INST_STORAGE: | |
128 | hr->asdr = vcpu->arch.fault_gpa; | |
129 | break; | |
7487cabc FR |
130 | case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: |
131 | hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) | | |
132 | (HFSCR_INTR_CAUSE & vcpu->arch.hfscr)); | |
133 | break; | |
360cae31 PM |
134 | case BOOK3S_INTERRUPT_H_EMUL_ASSIST: |
135 | hr->heir = vcpu->arch.emul_inst; | |
136 | break; | |
137 | } | |
138 | } | |
139 | ||
7487cabc | 140 | static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr) |
360cae31 PM |
141 | { |
142 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | |
143 | ||
13c7bb3c | 144 | vc->pcr = hr->pcr | PCR_MASK; |
360cae31 PM |
145 | vc->dpdes = hr->dpdes; |
146 | vcpu->arch.hfscr = hr->hfscr; | |
122954ed RB |
147 | vcpu->arch.dawr0 = hr->dawr0; |
148 | vcpu->arch.dawrx0 = hr->dawrx0; | |
360cae31 PM |
149 | vcpu->arch.ciabr = hr->ciabr; |
150 | vcpu->arch.purr = hr->purr; | |
151 | vcpu->arch.spurr = hr->spurr; | |
152 | vcpu->arch.ic = hr->ic; | |
153 | vc->vtb = hr->vtb; | |
154 | vcpu->arch.shregs.srr0 = hr->srr0; | |
155 | vcpu->arch.shregs.srr1 = hr->srr1; | |
156 | vcpu->arch.shregs.sprg0 = hr->sprg[0]; | |
157 | vcpu->arch.shregs.sprg1 = hr->sprg[1]; | |
158 | vcpu->arch.shregs.sprg2 = hr->sprg[2]; | |
159 | vcpu->arch.shregs.sprg3 = hr->sprg[3]; | |
160 | vcpu->arch.pid = hr->pidr; | |
161 | vcpu->arch.cfar = hr->cfar; | |
162 | vcpu->arch.ppr = hr->ppr; | |
bd1de1a0 RB |
163 | vcpu->arch.dawr1 = hr->dawr1; |
164 | vcpu->arch.dawrx1 = hr->dawrx1; | |
360cae31 PM |
165 | } |
166 | ||
167 | void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, | |
168 | struct hv_guest_state *hr) | |
169 | { | |
170 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | |
171 | ||
172 | vc->dpdes = hr->dpdes; | |
173 | vcpu->arch.hfscr = hr->hfscr; | |
174 | vcpu->arch.purr = hr->purr; | |
175 | vcpu->arch.spurr = hr->spurr; | |
176 | vcpu->arch.ic = hr->ic; | |
177 | vc->vtb = hr->vtb; | |
178 | vcpu->arch.fault_dar = hr->hdar; | |
179 | vcpu->arch.fault_dsisr = hr->hdsisr; | |
180 | vcpu->arch.fault_gpa = hr->asdr; | |
181 | vcpu->arch.emul_inst = hr->heir; | |
182 | vcpu->arch.shregs.srr0 = hr->srr0; | |
183 | vcpu->arch.shregs.srr1 = hr->srr1; | |
184 | vcpu->arch.shregs.sprg0 = hr->sprg[0]; | |
185 | vcpu->arch.shregs.sprg1 = hr->sprg[1]; | |
186 | vcpu->arch.shregs.sprg2 = hr->sprg[2]; | |
187 | vcpu->arch.shregs.sprg3 = hr->sprg[3]; | |
188 | vcpu->arch.pid = hr->pidr; | |
189 | vcpu->arch.cfar = hr->cfar; | |
190 | vcpu->arch.ppr = hr->ppr; | |
191 | } | |
192 | ||
873db2cd SJS |
193 | static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr) |
194 | { | |
195 | /* No need to reflect the page fault to L1, we've handled it */ | |
196 | vcpu->arch.trap = 0; | |
197 | ||
198 | /* | |
199 | * Since the L2 gprs have already been written back into L1 memory when | |
200 | * we complete the mmio, store the L1 memory location of the L2 gpr | |
201 | * being loaded into by the mmio so that the loaded value can be | |
202 | * written there in kvmppc_complete_mmio_load() | |
203 | */ | |
204 | if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR) | |
205 | && (vcpu->mmio_is_write == 0)) { | |
206 | vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr + | |
207 | offsetof(struct pt_regs, | |
208 | gpr[vcpu->arch.io_gpr]); | |
209 | vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR; | |
210 | } | |
211 | } | |
212 | ||
afe75049 RB |
213 | static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu, |
214 | struct hv_guest_state *l2_hv, | |
215 | struct pt_regs *l2_regs, | |
216 | u64 hv_ptr, u64 regs_ptr) | |
217 | { | |
218 | int size; | |
219 | ||
220 | if (kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv->version, | |
221 | sizeof(l2_hv->version))) | |
222 | return -1; | |
223 | ||
224 | if (kvmppc_need_byteswap(vcpu)) | |
225 | l2_hv->version = swab64(l2_hv->version); | |
226 | ||
227 | size = hv_guest_state_size(l2_hv->version); | |
228 | if (size < 0) | |
229 | return -1; | |
230 | ||
231 | return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) || | |
232 | kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs, | |
233 | sizeof(struct pt_regs)); | |
234 | } | |
235 | ||
236 | static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu, | |
237 | struct hv_guest_state *l2_hv, | |
238 | struct pt_regs *l2_regs, | |
239 | u64 hv_ptr, u64 regs_ptr) | |
240 | { | |
241 | int size; | |
242 | ||
243 | size = hv_guest_state_size(l2_hv->version); | |
244 | if (size < 0) | |
245 | return -1; | |
246 | ||
247 | return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) || | |
248 | kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs, | |
249 | sizeof(struct pt_regs)); | |
250 | } | |
251 | ||
7487cabc FR |
252 | static void load_l2_hv_regs(struct kvm_vcpu *vcpu, |
253 | const struct hv_guest_state *l2_hv, | |
254 | const struct hv_guest_state *l1_hv, u64 *lpcr) | |
255 | { | |
256 | struct kvmppc_vcore *vc = vcpu->arch.vcore; | |
257 | u64 mask; | |
258 | ||
259 | restore_hv_regs(vcpu, l2_hv); | |
260 | ||
261 | /* | |
262 | * Don't let L1 change LPCR bits for the L2 except these: | |
263 | */ | |
264 | mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | | |
265 | LPCR_LPES | LPCR_MER; | |
266 | ||
267 | /* | |
268 | * Additional filtering is required depending on hardware | |
269 | * and configuration. | |
270 | */ | |
271 | *lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm, | |
272 | (vc->lpcr & ~mask) | (*lpcr & mask)); | |
273 | ||
274 | /* | |
8b210a88 | 275 | * Don't let L1 enable features for L2 which we don't allow for L1, |
7487cabc FR |
276 | * but preserve the interrupt cause field. |
277 | */ | |
8b210a88 | 278 | vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted); |
7487cabc FR |
279 | |
280 | /* Don't let data address watchpoint match in hypervisor state */ | |
281 | vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP; | |
282 | vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP; | |
283 | ||
284 | /* Don't let completed instruction address breakpt match in HV state */ | |
285 | if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) | |
286 | vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV; | |
287 | } | |
288 | ||
360cae31 PM |
289 | long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) |
290 | { | |
291 | long int err, r; | |
292 | struct kvm_nested_guest *l2; | |
293 | struct pt_regs l2_regs, saved_l1_regs; | |
afe75049 | 294 | struct hv_guest_state l2_hv = {0}, saved_l1_hv; |
360cae31 PM |
295 | struct kvmppc_vcore *vc = vcpu->arch.vcore; |
296 | u64 hv_ptr, regs_ptr; | |
7487cabc | 297 | u64 hdec_exp, lpcr; |
360cae31 | 298 | s64 delta_purr, delta_spurr, delta_ic, delta_vtb; |
360cae31 PM |
299 | |
300 | if (vcpu->kvm->arch.l1_ptcr == 0) | |
301 | return H_NOT_AVAILABLE; | |
302 | ||
d9c57d3e NP |
303 | if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) |
304 | return H_BAD_MODE; | |
305 | ||
360cae31 PM |
306 | /* copy parameters in */ |
307 | hv_ptr = kvmppc_get_gpr(vcpu, 4); | |
1508c22f AK |
308 | regs_ptr = kvmppc_get_gpr(vcpu, 5); |
309 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | |
afe75049 RB |
310 | err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs, |
311 | hv_ptr, regs_ptr); | |
1508c22f | 312 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
360cae31 PM |
313 | if (err) |
314 | return H_PARAMETER; | |
1508c22f | 315 | |
10b5022d SJS |
316 | if (kvmppc_need_byteswap(vcpu)) |
317 | byteswap_hv_regs(&l2_hv); | |
afe75049 | 318 | if (l2_hv.version > HV_GUEST_STATE_VERSION) |
360cae31 PM |
319 | return H_P2; |
320 | ||
10b5022d SJS |
321 | if (kvmppc_need_byteswap(vcpu)) |
322 | byteswap_pt_regs(&l2_regs); | |
9d0b048d SJS |
323 | if (l2_hv.vcpu_token >= NR_CPUS) |
324 | return H_PARAMETER; | |
325 | ||
d9c57d3e NP |
326 | /* |
327 | * L1 must have set up a suspended state to enter the L2 in a | |
328 | * transactional state, and only in that case. These have to be | |
329 | * filtered out here to prevent causing a TM Bad Thing in the | |
330 | * host HRFID. We could synthesize a TM Bad Thing back to the L1 | |
331 | * here but there doesn't seem like much point. | |
332 | */ | |
333 | if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) { | |
334 | if (!MSR_TM_ACTIVE(l2_regs.msr)) | |
335 | return H_BAD_MODE; | |
336 | } else { | |
337 | if (l2_regs.msr & MSR_TS_MASK) | |
338 | return H_BAD_MODE; | |
339 | if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK)) | |
340 | return H_BAD_MODE; | |
341 | } | |
342 | ||
360cae31 PM |
343 | /* translate lpid */ |
344 | l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true); | |
345 | if (!l2) | |
346 | return H_PARAMETER; | |
347 | if (!l2->l1_gr_to_hr) { | |
348 | mutex_lock(&l2->tlb_lock); | |
349 | kvmhv_update_ptbl_cache(l2); | |
350 | mutex_unlock(&l2->tlb_lock); | |
351 | } | |
352 | ||
353 | /* save l1 values of things */ | |
354 | vcpu->arch.regs.msr = vcpu->arch.shregs.msr; | |
355 | saved_l1_regs = vcpu->arch.regs; | |
356 | kvmhv_save_hv_regs(vcpu, &saved_l1_hv); | |
357 | ||
358 | /* convert TB values/offsets to host (L0) values */ | |
359 | hdec_exp = l2_hv.hdec_expiry - vc->tb_offset; | |
360 | vc->tb_offset += l2_hv.tb_offset; | |
361 | ||
362 | /* set L1 state to L2 state */ | |
363 | vcpu->arch.nested = l2; | |
364 | vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; | |
3fd3f9f5 | 365 | vcpu->arch.nested_hfscr = l2_hv.hfscr; |
360cae31 | 366 | vcpu->arch.regs = l2_regs; |
946cf44a | 367 | |
732f21a3 NP |
368 | /* Guest must always run with ME enabled, HV disabled. */ |
369 | vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV; | |
a19b70ab | 370 | |
7487cabc FR |
371 | lpcr = l2_hv.lpcr; |
372 | load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr); | |
360cae31 PM |
373 | |
374 | vcpu->arch.ret = RESUME_GUEST; | |
375 | vcpu->arch.trap = 0; | |
376 | do { | |
377 | if (mftb() >= hdec_exp) { | |
378 | vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; | |
379 | r = RESUME_HOST; | |
380 | break; | |
381 | } | |
7487cabc | 382 | r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr); |
360cae31 PM |
383 | } while (is_kvmppc_resume_guest(r)); |
384 | ||
385 | /* save L2 state for return */ | |
386 | l2_regs = vcpu->arch.regs; | |
387 | l2_regs.msr = vcpu->arch.shregs.msr; | |
388 | delta_purr = vcpu->arch.purr - l2_hv.purr; | |
389 | delta_spurr = vcpu->arch.spurr - l2_hv.spurr; | |
390 | delta_ic = vcpu->arch.ic - l2_hv.ic; | |
391 | delta_vtb = vc->vtb - l2_hv.vtb; | |
f2e29db1 | 392 | save_hv_return_state(vcpu, &l2_hv); |
360cae31 PM |
393 | |
394 | /* restore L1 state */ | |
395 | vcpu->arch.nested = NULL; | |
396 | vcpu->arch.regs = saved_l1_regs; | |
397 | vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK; | |
398 | /* set L1 MSR TS field according to L2 transaction state */ | |
399 | if (l2_regs.msr & MSR_TS_MASK) | |
400 | vcpu->arch.shregs.msr |= MSR_TS_S; | |
401 | vc->tb_offset = saved_l1_hv.tb_offset; | |
402 | restore_hv_regs(vcpu, &saved_l1_hv); | |
403 | vcpu->arch.purr += delta_purr; | |
404 | vcpu->arch.spurr += delta_spurr; | |
405 | vcpu->arch.ic += delta_ic; | |
406 | vc->vtb += delta_vtb; | |
407 | ||
408 | kvmhv_put_nested(l2); | |
409 | ||
410 | /* copy l2_hv_state and regs back to guest */ | |
10b5022d SJS |
411 | if (kvmppc_need_byteswap(vcpu)) { |
412 | byteswap_hv_regs(&l2_hv); | |
413 | byteswap_pt_regs(&l2_regs); | |
414 | } | |
1508c22f | 415 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
afe75049 RB |
416 | err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs, |
417 | hv_ptr, regs_ptr); | |
1508c22f | 418 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
360cae31 PM |
419 | if (err) |
420 | return H_AUTHORITY; | |
421 | ||
422 | if (r == -EINTR) | |
423 | return H_INTERRUPT; | |
424 | ||
873db2cd SJS |
425 | if (vcpu->mmio_needed) { |
426 | kvmhv_nested_mmio_needed(vcpu, regs_ptr); | |
427 | return H_TOO_HARD; | |
428 | } | |
429 | ||
360cae31 PM |
430 | return vcpu->arch.trap; |
431 | } | |
432 | ||
8e3f5fc1 PM |
433 | long kvmhv_nested_init(void) |
434 | { | |
435 | long int ptb_order; | |
436 | unsigned long ptcr; | |
437 | long rc; | |
438 | ||
439 | if (!kvmhv_on_pseries()) | |
440 | return 0; | |
441 | if (!radix_enabled()) | |
442 | return -ENODEV; | |
443 | ||
444 | /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */ | |
445 | ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1; | |
446 | if (ptb_order < 8) | |
447 | ptb_order = 8; | |
448 | pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order, | |
449 | GFP_KERNEL); | |
450 | if (!pseries_partition_tb) { | |
451 | pr_err("kvm-hv: failed to allocated nested partition table\n"); | |
452 | return -ENOMEM; | |
453 | } | |
454 | ||
455 | ptcr = __pa(pseries_partition_tb) | (ptb_order - 8); | |
456 | rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr); | |
457 | if (rc != H_SUCCESS) { | |
458 | pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n", | |
459 | rc); | |
460 | kfree(pseries_partition_tb); | |
461 | pseries_partition_tb = NULL; | |
462 | return -ENODEV; | |
463 | } | |
464 | ||
465 | return 0; | |
466 | } | |
467 | ||
468 | void kvmhv_nested_exit(void) | |
469 | { | |
470 | /* | |
471 | * N.B. the kvmhv_on_pseries() test is there because it enables | |
472 | * the compiler to remove the call to plpar_hcall_norets() | |
473 | * when CONFIG_PPC_PSERIES=n. | |
474 | */ | |
475 | if (kvmhv_on_pseries() && pseries_partition_tb) { | |
476 | plpar_hcall_norets(H_SET_PARTITION_TABLE, 0); | |
477 | kfree(pseries_partition_tb); | |
478 | pseries_partition_tb = NULL; | |
479 | } | |
480 | } | |
481 | ||
690ed4ca PM |
482 | static void kvmhv_flush_lpid(unsigned int lpid) |
483 | { | |
484 | long rc; | |
485 | ||
486 | if (!kvmhv_on_pseries()) { | |
99161de3 | 487 | radix__flush_all_lpid(lpid); |
690ed4ca PM |
488 | return; |
489 | } | |
490 | ||
81468083 BR |
491 | if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) |
492 | rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1), | |
493 | lpid, TLBIEL_INVAL_SET_LPID); | |
494 | else | |
495 | rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU, | |
496 | H_RPTI_TYPE_NESTED | | |
497 | H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | | |
498 | H_RPTI_TYPE_PAT, | |
499 | H_RPTI_PAGE_ALL, 0, -1UL); | |
690ed4ca PM |
500 | if (rc) |
501 | pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc); | |
502 | } | |
503 | ||
8e3f5fc1 PM |
504 | void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1) |
505 | { | |
690ed4ca | 506 | if (!kvmhv_on_pseries()) { |
fd13daea | 507 | mmu_partition_table_set_entry(lpid, dw0, dw1, true); |
690ed4ca | 508 | return; |
8e3f5fc1 | 509 | } |
690ed4ca PM |
510 | |
511 | pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0); | |
512 | pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1); | |
513 | /* L0 will do the necessary barriers */ | |
514 | kvmhv_flush_lpid(lpid); | |
8e3f5fc1 PM |
515 | } |
516 | ||
517 | static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp) | |
518 | { | |
519 | unsigned long dw0; | |
520 | ||
521 | dw0 = PATB_HR | radix__get_tree_size() | | |
522 | __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE; | |
523 | kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table); | |
524 | } | |
525 | ||
526 | void kvmhv_vm_nested_init(struct kvm *kvm) | |
527 | { | |
528 | kvm->arch.max_nested_lpid = -1; | |
529 | } | |
530 | ||
531 | /* | |
532 | * Handle the H_SET_PARTITION_TABLE hcall. | |
533 | * r4 = guest real address of partition table + log_2(size) - 12 | |
534 | * (formatted as for the PTCR). | |
535 | */ | |
536 | long kvmhv_set_partition_table(struct kvm_vcpu *vcpu) | |
537 | { | |
538 | struct kvm *kvm = vcpu->kvm; | |
539 | unsigned long ptcr = kvmppc_get_gpr(vcpu, 4); | |
540 | int srcu_idx; | |
541 | long ret = H_SUCCESS; | |
542 | ||
543 | srcu_idx = srcu_read_lock(&kvm->srcu); | |
544 | /* | |
545 | * Limit the partition table to 4096 entries (because that's what | |
546 | * hardware supports), and check the base address. | |
547 | */ | |
548 | if ((ptcr & PRTS_MASK) > 12 - 8 || | |
549 | !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT)) | |
550 | ret = H_PARAMETER; | |
551 | srcu_read_unlock(&kvm->srcu, srcu_idx); | |
552 | if (ret == H_SUCCESS) | |
553 | kvm->arch.l1_ptcr = ptcr; | |
554 | return ret; | |
555 | } | |
556 | ||
6ff887b8 SJS |
557 | /* |
558 | * Handle the H_COPY_TOFROM_GUEST hcall. | |
559 | * r4 = L1 lpid of nested guest | |
560 | * r5 = pid | |
561 | * r6 = eaddr to access | |
562 | * r7 = to buffer (L1 gpa) | |
563 | * r8 = from buffer (L1 gpa) | |
564 | * r9 = n bytes to copy | |
565 | */ | |
566 | long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) | |
567 | { | |
568 | struct kvm_nested_guest *gp; | |
569 | int l1_lpid = kvmppc_get_gpr(vcpu, 4); | |
570 | int pid = kvmppc_get_gpr(vcpu, 5); | |
571 | gva_t eaddr = kvmppc_get_gpr(vcpu, 6); | |
572 | gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7); | |
573 | gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8); | |
574 | void *buf; | |
575 | unsigned long n = kvmppc_get_gpr(vcpu, 9); | |
576 | bool is_load = !!gp_to; | |
577 | long rc; | |
578 | ||
579 | if (gp_to && gp_from) /* One must be NULL to determine the direction */ | |
580 | return H_PARAMETER; | |
581 | ||
582 | if (eaddr & (0xFFFUL << 52)) | |
583 | return H_PARAMETER; | |
584 | ||
b097c377 | 585 | buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN); |
6ff887b8 SJS |
586 | if (!buf) |
587 | return H_NO_MEM; | |
588 | ||
589 | gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false); | |
590 | if (!gp) { | |
591 | rc = H_PARAMETER; | |
592 | goto out_free; | |
593 | } | |
594 | ||
595 | mutex_lock(&gp->tlb_lock); | |
596 | ||
597 | if (is_load) { | |
598 | /* Load from the nested guest into our buffer */ | |
599 | rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, | |
600 | eaddr, buf, NULL, n); | |
601 | if (rc) | |
602 | goto not_found; | |
603 | ||
604 | /* Write what was loaded into our buffer back to the L1 guest */ | |
1508c22f | 605 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
6ff887b8 | 606 | rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); |
1508c22f | 607 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
6ff887b8 SJS |
608 | if (rc) |
609 | goto not_found; | |
610 | } else { | |
611 | /* Load the data to be stored from the L1 guest into our buf */ | |
1508c22f | 612 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); |
6ff887b8 | 613 | rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); |
1508c22f | 614 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); |
6ff887b8 SJS |
615 | if (rc) |
616 | goto not_found; | |
617 | ||
618 | /* Store from our buffer into the nested guest */ | |
619 | rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, | |
620 | eaddr, NULL, buf, n); | |
621 | if (rc) | |
622 | goto not_found; | |
623 | } | |
624 | ||
625 | out_unlock: | |
626 | mutex_unlock(&gp->tlb_lock); | |
627 | kvmhv_put_nested(gp); | |
628 | out_free: | |
629 | kfree(buf); | |
630 | return rc; | |
631 | not_found: | |
632 | rc = H_NOT_FOUND; | |
633 | goto out_unlock; | |
634 | } | |
635 | ||
8e3f5fc1 PM |
636 | /* |
637 | * Reload the partition table entry for a guest. | |
638 | * Caller must hold gp->tlb_lock. | |
639 | */ | |
640 | static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) | |
641 | { | |
642 | int ret; | |
643 | struct patb_entry ptbl_entry; | |
644 | unsigned long ptbl_addr; | |
645 | struct kvm *kvm = gp->l1_host; | |
646 | ||
647 | ret = -EFAULT; | |
648 | ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); | |
1508c22f AK |
649 | if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) { |
650 | int srcu_idx = srcu_read_lock(&kvm->srcu); | |
8e3f5fc1 PM |
651 | ret = kvm_read_guest(kvm, ptbl_addr, |
652 | &ptbl_entry, sizeof(ptbl_entry)); | |
1508c22f AK |
653 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
654 | } | |
8e3f5fc1 PM |
655 | if (ret) { |
656 | gp->l1_gr_to_hr = 0; | |
657 | gp->process_table = 0; | |
658 | } else { | |
659 | gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0); | |
660 | gp->process_table = be64_to_cpu(ptbl_entry.patb1); | |
661 | } | |
662 | kvmhv_set_nested_ptbl(gp); | |
663 | } | |
664 | ||
cf59eb13 | 665 | static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) |
8e3f5fc1 PM |
666 | { |
667 | struct kvm_nested_guest *gp; | |
668 | long shadow_lpid; | |
669 | ||
670 | gp = kzalloc(sizeof(*gp), GFP_KERNEL); | |
671 | if (!gp) | |
672 | return NULL; | |
673 | gp->l1_host = kvm; | |
674 | gp->l1_lpid = lpid; | |
675 | mutex_init(&gp->tlb_lock); | |
676 | gp->shadow_pgtable = pgd_alloc(kvm->mm); | |
677 | if (!gp->shadow_pgtable) | |
678 | goto out_free; | |
679 | shadow_lpid = kvmppc_alloc_lpid(); | |
680 | if (shadow_lpid < 0) | |
681 | goto out_free2; | |
682 | gp->shadow_lpid = shadow_lpid; | |
d232afeb | 683 | gp->radix = 1; |
8e3f5fc1 | 684 | |
9d0b048d SJS |
685 | memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu)); |
686 | ||
8e3f5fc1 PM |
687 | return gp; |
688 | ||
689 | out_free2: | |
690 | pgd_free(kvm->mm, gp->shadow_pgtable); | |
691 | out_free: | |
692 | kfree(gp); | |
693 | return NULL; | |
694 | } | |
695 | ||
696 | /* | |
697 | * Free up any resources allocated for a nested guest. | |
698 | */ | |
699 | static void kvmhv_release_nested(struct kvm_nested_guest *gp) | |
700 | { | |
fd10be25 SJS |
701 | struct kvm *kvm = gp->l1_host; |
702 | ||
703 | if (gp->shadow_pgtable) { | |
704 | /* | |
705 | * No vcpu is using this struct and no call to | |
706 | * kvmhv_get_nested can find this struct, | |
707 | * so we don't need to hold kvm->mmu_lock. | |
708 | */ | |
709 | kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, | |
710 | gp->shadow_lpid); | |
711 | pgd_free(kvm->mm, gp->shadow_pgtable); | |
712 | } | |
8e3f5fc1 PM |
713 | kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0); |
714 | kvmppc_free_lpid(gp->shadow_lpid); | |
8e3f5fc1 PM |
715 | kfree(gp); |
716 | } | |
717 | ||
718 | static void kvmhv_remove_nested(struct kvm_nested_guest *gp) | |
719 | { | |
720 | struct kvm *kvm = gp->l1_host; | |
721 | int lpid = gp->l1_lpid; | |
722 | long ref; | |
723 | ||
724 | spin_lock(&kvm->mmu_lock); | |
725 | if (gp == kvm->arch.nested_guests[lpid]) { | |
726 | kvm->arch.nested_guests[lpid] = NULL; | |
727 | if (lpid == kvm->arch.max_nested_lpid) { | |
728 | while (--lpid >= 0 && !kvm->arch.nested_guests[lpid]) | |
729 | ; | |
730 | kvm->arch.max_nested_lpid = lpid; | |
731 | } | |
732 | --gp->refcnt; | |
733 | } | |
734 | ref = gp->refcnt; | |
735 | spin_unlock(&kvm->mmu_lock); | |
736 | if (ref == 0) | |
737 | kvmhv_release_nested(gp); | |
738 | } | |
739 | ||
740 | /* | |
741 | * Free up all nested resources allocated for this guest. | |
742 | * This is called with no vcpus of the guest running, when | |
743 | * switching the guest to HPT mode or when destroying the | |
744 | * guest. | |
745 | */ | |
746 | void kvmhv_release_all_nested(struct kvm *kvm) | |
747 | { | |
748 | int i; | |
749 | struct kvm_nested_guest *gp; | |
750 | struct kvm_nested_guest *freelist = NULL; | |
8cf531ed SJS |
751 | struct kvm_memory_slot *memslot; |
752 | int srcu_idx; | |
8e3f5fc1 PM |
753 | |
754 | spin_lock(&kvm->mmu_lock); | |
755 | for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { | |
756 | gp = kvm->arch.nested_guests[i]; | |
757 | if (!gp) | |
758 | continue; | |
759 | kvm->arch.nested_guests[i] = NULL; | |
760 | if (--gp->refcnt == 0) { | |
761 | gp->next = freelist; | |
762 | freelist = gp; | |
763 | } | |
764 | } | |
765 | kvm->arch.max_nested_lpid = -1; | |
766 | spin_unlock(&kvm->mmu_lock); | |
767 | while ((gp = freelist) != NULL) { | |
768 | freelist = gp->next; | |
769 | kvmhv_release_nested(gp); | |
770 | } | |
8cf531ed SJS |
771 | |
772 | srcu_idx = srcu_read_lock(&kvm->srcu); | |
773 | kvm_for_each_memslot(memslot, kvm_memslots(kvm)) | |
774 | kvmhv_free_memslot_nest_rmap(memslot); | |
775 | srcu_read_unlock(&kvm->srcu, srcu_idx); | |
8e3f5fc1 PM |
776 | } |
777 | ||
778 | /* caller must hold gp->tlb_lock */ | |
e3b6b466 | 779 | static void kvmhv_flush_nested(struct kvm_nested_guest *gp) |
8e3f5fc1 | 780 | { |
fd10be25 SJS |
781 | struct kvm *kvm = gp->l1_host; |
782 | ||
783 | spin_lock(&kvm->mmu_lock); | |
784 | kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid); | |
785 | spin_unlock(&kvm->mmu_lock); | |
690ed4ca | 786 | kvmhv_flush_lpid(gp->shadow_lpid); |
8e3f5fc1 PM |
787 | kvmhv_update_ptbl_cache(gp); |
788 | if (gp->l1_gr_to_hr == 0) | |
789 | kvmhv_remove_nested(gp); | |
790 | } | |
791 | ||
792 | struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid, | |
793 | bool create) | |
794 | { | |
795 | struct kvm_nested_guest *gp, *newgp; | |
796 | ||
797 | if (l1_lpid >= KVM_MAX_NESTED_GUESTS || | |
798 | l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) | |
799 | return NULL; | |
800 | ||
801 | spin_lock(&kvm->mmu_lock); | |
802 | gp = kvm->arch.nested_guests[l1_lpid]; | |
803 | if (gp) | |
804 | ++gp->refcnt; | |
805 | spin_unlock(&kvm->mmu_lock); | |
806 | ||
807 | if (gp || !create) | |
808 | return gp; | |
809 | ||
810 | newgp = kvmhv_alloc_nested(kvm, l1_lpid); | |
811 | if (!newgp) | |
812 | return NULL; | |
813 | spin_lock(&kvm->mmu_lock); | |
814 | if (kvm->arch.nested_guests[l1_lpid]) { | |
815 | /* someone else beat us to it */ | |
816 | gp = kvm->arch.nested_guests[l1_lpid]; | |
817 | } else { | |
818 | kvm->arch.nested_guests[l1_lpid] = newgp; | |
819 | ++newgp->refcnt; | |
820 | gp = newgp; | |
821 | newgp = NULL; | |
822 | if (l1_lpid > kvm->arch.max_nested_lpid) | |
823 | kvm->arch.max_nested_lpid = l1_lpid; | |
824 | } | |
825 | ++gp->refcnt; | |
826 | spin_unlock(&kvm->mmu_lock); | |
827 | ||
828 | if (newgp) | |
829 | kvmhv_release_nested(newgp); | |
830 | ||
831 | return gp; | |
832 | } | |
833 | ||
834 | void kvmhv_put_nested(struct kvm_nested_guest *gp) | |
835 | { | |
836 | struct kvm *kvm = gp->l1_host; | |
837 | long ref; | |
838 | ||
839 | spin_lock(&kvm->mmu_lock); | |
840 | ref = --gp->refcnt; | |
841 | spin_unlock(&kvm->mmu_lock); | |
842 | if (ref == 0) | |
843 | kvmhv_release_nested(gp); | |
844 | } | |
360cae31 | 845 | |
8cf531ed SJS |
846 | static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) |
847 | { | |
848 | if (lpid > kvm->arch.max_nested_lpid) | |
849 | return NULL; | |
850 | return kvm->arch.nested_guests[lpid]; | |
851 | } | |
852 | ||
6cdf3037 AK |
853 | pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, |
854 | unsigned long ea, unsigned *hshift) | |
dc891849 AK |
855 | { |
856 | struct kvm_nested_guest *gp; | |
857 | pte_t *pte; | |
858 | ||
859 | gp = kvmhv_find_nested(kvm, lpid); | |
860 | if (!gp) | |
861 | return NULL; | |
862 | ||
863 | VM_WARN(!spin_is_locked(&kvm->mmu_lock), | |
864 | "%s called with kvm mmu_lock not held \n", __func__); | |
865 | pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift); | |
866 | ||
867 | return pte; | |
868 | } | |
869 | ||
8cf531ed SJS |
870 | static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2) |
871 | { | |
872 | return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK | | |
873 | RMAP_NESTED_GPA_MASK)); | |
874 | } | |
875 | ||
876 | void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp, | |
877 | struct rmap_nested **n_rmap) | |
878 | { | |
879 | struct llist_node *entry = ((struct llist_head *) rmapp)->first; | |
880 | struct rmap_nested *cursor; | |
881 | u64 rmap, new_rmap = (*n_rmap)->rmap; | |
882 | ||
883 | /* Are there any existing entries? */ | |
884 | if (!(*rmapp)) { | |
885 | /* No -> use the rmap as a single entry */ | |
886 | *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY; | |
887 | return; | |
888 | } | |
889 | ||
890 | /* Do any entries match what we're trying to insert? */ | |
891 | for_each_nest_rmap_safe(cursor, entry, &rmap) { | |
892 | if (kvmhv_n_rmap_is_equal(rmap, new_rmap)) | |
893 | return; | |
894 | } | |
895 | ||
896 | /* Do we need to create a list or just add the new entry? */ | |
897 | rmap = *rmapp; | |
898 | if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */ | |
899 | *rmapp = 0UL; | |
900 | llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp); | |
901 | if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */ | |
902 | (*n_rmap)->list.next = (struct llist_node *) rmap; | |
903 | ||
904 | /* Set NULL so not freed by caller */ | |
905 | *n_rmap = NULL; | |
906 | } | |
907 | ||
90165d3d SJS |
908 | static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap, |
909 | unsigned long clr, unsigned long set, | |
910 | unsigned long hpa, unsigned long mask) | |
911 | { | |
90165d3d SJS |
912 | unsigned long gpa; |
913 | unsigned int shift, lpid; | |
914 | pte_t *ptep; | |
915 | ||
916 | gpa = n_rmap & RMAP_NESTED_GPA_MASK; | |
917 | lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; | |
90165d3d SJS |
918 | |
919 | /* Find the pte */ | |
dc891849 | 920 | ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); |
90165d3d SJS |
921 | /* |
922 | * If the pte is present and the pfn is still the same, update the pte. | |
923 | * If the pfn has changed then this is a stale rmap entry, the nested | |
924 | * gpa actually points somewhere else now, and there is nothing to do. | |
925 | * XXX A future optimisation would be to remove the rmap entry here. | |
926 | */ | |
927 | if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) { | |
928 | __radix_pte_update(ptep, clr, set); | |
929 | kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid); | |
930 | } | |
931 | } | |
932 | ||
933 | /* | |
934 | * For a given list of rmap entries, update the rc bits in all ptes in shadow | |
935 | * page tables for nested guests which are referenced by the rmap list. | |
936 | */ | |
937 | void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp, | |
938 | unsigned long clr, unsigned long set, | |
939 | unsigned long hpa, unsigned long nbytes) | |
940 | { | |
941 | struct llist_node *entry = ((struct llist_head *) rmapp)->first; | |
942 | struct rmap_nested *cursor; | |
943 | unsigned long rmap, mask; | |
944 | ||
945 | if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED)) | |
946 | return; | |
947 | ||
948 | mask = PTE_RPN_MASK & ~(nbytes - 1); | |
949 | hpa &= mask; | |
950 | ||
951 | for_each_nest_rmap_safe(cursor, entry, &rmap) | |
952 | kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask); | |
953 | } | |
954 | ||
8cf531ed SJS |
955 | static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap, |
956 | unsigned long hpa, unsigned long mask) | |
957 | { | |
958 | struct kvm_nested_guest *gp; | |
959 | unsigned long gpa; | |
960 | unsigned int shift, lpid; | |
961 | pte_t *ptep; | |
962 | ||
963 | gpa = n_rmap & RMAP_NESTED_GPA_MASK; | |
964 | lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; | |
965 | gp = kvmhv_find_nested(kvm, lpid); | |
966 | if (!gp) | |
967 | return; | |
968 | ||
969 | /* Find and invalidate the pte */ | |
dc891849 | 970 | ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); |
8cf531ed SJS |
971 | /* Don't spuriously invalidate ptes if the pfn has changed */ |
972 | if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) | |
973 | kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); | |
974 | } | |
975 | ||
976 | static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp, | |
977 | unsigned long hpa, unsigned long mask) | |
978 | { | |
979 | struct llist_node *entry = llist_del_all((struct llist_head *) rmapp); | |
980 | struct rmap_nested *cursor; | |
981 | unsigned long rmap; | |
982 | ||
983 | for_each_nest_rmap_safe(cursor, entry, &rmap) { | |
984 | kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask); | |
985 | kfree(cursor); | |
986 | } | |
987 | } | |
988 | ||
989 | /* called with kvm->mmu_lock held */ | |
990 | void kvmhv_remove_nest_rmap_range(struct kvm *kvm, | |
c43c3a86 | 991 | const struct kvm_memory_slot *memslot, |
8cf531ed SJS |
992 | unsigned long gpa, unsigned long hpa, |
993 | unsigned long nbytes) | |
994 | { | |
995 | unsigned long gfn, end_gfn; | |
996 | unsigned long addr_mask; | |
997 | ||
998 | if (!memslot) | |
999 | return; | |
1000 | gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn; | |
1001 | end_gfn = gfn + (nbytes >> PAGE_SHIFT); | |
1002 | ||
1003 | addr_mask = PTE_RPN_MASK & ~(nbytes - 1); | |
1004 | hpa &= addr_mask; | |
1005 | ||
1006 | for (; gfn < end_gfn; gfn++) { | |
1007 | unsigned long *rmap = &memslot->arch.rmap[gfn]; | |
1008 | kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask); | |
1009 | } | |
1010 | } | |
1011 | ||
1012 | static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free) | |
1013 | { | |
1014 | unsigned long page; | |
1015 | ||
1016 | for (page = 0; page < free->npages; page++) { | |
1017 | unsigned long rmap, *rmapp = &free->arch.rmap[page]; | |
1018 | struct rmap_nested *cursor; | |
1019 | struct llist_node *entry; | |
1020 | ||
1021 | entry = llist_del_all((struct llist_head *) rmapp); | |
1022 | for_each_nest_rmap_safe(cursor, entry, &rmap) | |
1023 | kfree(cursor); | |
1024 | } | |
1025 | } | |
1026 | ||
fd10be25 SJS |
1027 | static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu, |
1028 | struct kvm_nested_guest *gp, | |
1029 | long gpa, int *shift_ret) | |
1030 | { | |
1031 | struct kvm *kvm = vcpu->kvm; | |
1032 | bool ret = false; | |
1033 | pte_t *ptep; | |
1034 | int shift; | |
1035 | ||
1036 | spin_lock(&kvm->mmu_lock); | |
dc891849 | 1037 | ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift); |
fd10be25 SJS |
1038 | if (!shift) |
1039 | shift = PAGE_SHIFT; | |
1040 | if (ptep && pte_present(*ptep)) { | |
1041 | kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); | |
1042 | ret = true; | |
1043 | } | |
1044 | spin_unlock(&kvm->mmu_lock); | |
1045 | ||
1046 | if (shift_ret) | |
1047 | *shift_ret = shift; | |
1048 | return ret; | |
1049 | } | |
1050 | ||
e3b6b466 SJS |
1051 | static inline int get_ric(unsigned int instr) |
1052 | { | |
1053 | return (instr >> 18) & 0x3; | |
1054 | } | |
1055 | ||
1056 | static inline int get_prs(unsigned int instr) | |
1057 | { | |
1058 | return (instr >> 17) & 0x1; | |
1059 | } | |
1060 | ||
1061 | static inline int get_r(unsigned int instr) | |
1062 | { | |
1063 | return (instr >> 16) & 0x1; | |
1064 | } | |
1065 | ||
1066 | static inline int get_lpid(unsigned long r_val) | |
1067 | { | |
1068 | return r_val & 0xffffffff; | |
1069 | } | |
1070 | ||
1071 | static inline int get_is(unsigned long r_val) | |
1072 | { | |
1073 | return (r_val >> 10) & 0x3; | |
1074 | } | |
1075 | ||
1076 | static inline int get_ap(unsigned long r_val) | |
1077 | { | |
1078 | return (r_val >> 5) & 0x7; | |
1079 | } | |
1080 | ||
1081 | static inline long get_epn(unsigned long r_val) | |
1082 | { | |
1083 | return r_val >> 12; | |
1084 | } | |
1085 | ||
1086 | static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid, | |
1087 | int ap, long epn) | |
1088 | { | |
1089 | struct kvm *kvm = vcpu->kvm; | |
1090 | struct kvm_nested_guest *gp; | |
1091 | long npages; | |
1092 | int shift, shadow_shift; | |
1093 | unsigned long addr; | |
1094 | ||
1095 | shift = ap_to_shift(ap); | |
1096 | addr = epn << 12; | |
1097 | if (shift < 0) | |
1098 | /* Invalid ap encoding */ | |
1099 | return -EINVAL; | |
1100 | ||
1101 | addr &= ~((1UL << shift) - 1); | |
1102 | npages = 1UL << (shift - PAGE_SHIFT); | |
1103 | ||
1104 | gp = kvmhv_get_nested(kvm, lpid, false); | |
1105 | if (!gp) /* No such guest -> nothing to do */ | |
1106 | return 0; | |
1107 | mutex_lock(&gp->tlb_lock); | |
1108 | ||
1109 | /* There may be more than one host page backing this single guest pte */ | |
1110 | do { | |
1111 | kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift); | |
1112 | ||
1113 | npages -= 1UL << (shadow_shift - PAGE_SHIFT); | |
1114 | addr += 1UL << shadow_shift; | |
1115 | } while (npages > 0); | |
1116 | ||
1117 | mutex_unlock(&gp->tlb_lock); | |
1118 | kvmhv_put_nested(gp); | |
1119 | return 0; | |
1120 | } | |
1121 | ||
1122 | static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu, | |
1123 | struct kvm_nested_guest *gp, int ric) | |
1124 | { | |
1125 | struct kvm *kvm = vcpu->kvm; | |
1126 | ||
1127 | mutex_lock(&gp->tlb_lock); | |
1128 | switch (ric) { | |
1129 | case 0: | |
1130 | /* Invalidate TLB */ | |
1131 | spin_lock(&kvm->mmu_lock); | |
1132 | kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, | |
1133 | gp->shadow_lpid); | |
690ed4ca | 1134 | kvmhv_flush_lpid(gp->shadow_lpid); |
e3b6b466 SJS |
1135 | spin_unlock(&kvm->mmu_lock); |
1136 | break; | |
1137 | case 1: | |
1138 | /* | |
1139 | * Invalidate PWC | |
1140 | * We don't cache this -> nothing to do | |
1141 | */ | |
1142 | break; | |
1143 | case 2: | |
1144 | /* Invalidate TLB, PWC and caching of partition table entries */ | |
1145 | kvmhv_flush_nested(gp); | |
1146 | break; | |
1147 | default: | |
1148 | break; | |
1149 | } | |
1150 | mutex_unlock(&gp->tlb_lock); | |
1151 | } | |
1152 | ||
1153 | static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric) | |
1154 | { | |
1155 | struct kvm *kvm = vcpu->kvm; | |
1156 | struct kvm_nested_guest *gp; | |
1157 | int i; | |
1158 | ||
1159 | spin_lock(&kvm->mmu_lock); | |
1160 | for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { | |
1161 | gp = kvm->arch.nested_guests[i]; | |
1162 | if (gp) { | |
1163 | spin_unlock(&kvm->mmu_lock); | |
1164 | kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); | |
1165 | spin_lock(&kvm->mmu_lock); | |
1166 | } | |
1167 | } | |
1168 | spin_unlock(&kvm->mmu_lock); | |
1169 | } | |
1170 | ||
1171 | static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr, | |
1172 | unsigned long rsval, unsigned long rbval) | |
1173 | { | |
1174 | struct kvm *kvm = vcpu->kvm; | |
1175 | struct kvm_nested_guest *gp; | |
1176 | int r, ric, prs, is, ap; | |
1177 | int lpid; | |
1178 | long epn; | |
1179 | int ret = 0; | |
1180 | ||
1181 | ric = get_ric(instr); | |
1182 | prs = get_prs(instr); | |
1183 | r = get_r(instr); | |
1184 | lpid = get_lpid(rsval); | |
1185 | is = get_is(rbval); | |
1186 | ||
1187 | /* | |
1188 | * These cases are invalid and are not handled: | |
1189 | * r != 1 -> Only radix supported | |
1190 | * prs == 1 -> Not HV privileged | |
1191 | * ric == 3 -> No cluster bombs for radix | |
1192 | * is == 1 -> Partition scoped translations not associated with pid | |
1193 | * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA | |
1194 | */ | |
1195 | if ((!r) || (prs) || (ric == 3) || (is == 1) || | |
1196 | ((!is) && (ric == 1 || ric == 2))) | |
1197 | return -EINVAL; | |
1198 | ||
1199 | switch (is) { | |
1200 | case 0: | |
1201 | /* | |
1202 | * We know ric == 0 | |
1203 | * Invalidate TLB for a given target address | |
1204 | */ | |
1205 | epn = get_epn(rbval); | |
1206 | ap = get_ap(rbval); | |
1207 | ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn); | |
1208 | break; | |
1209 | case 2: | |
1210 | /* Invalidate matching LPID */ | |
1211 | gp = kvmhv_get_nested(kvm, lpid, false); | |
1212 | if (gp) { | |
1213 | kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); | |
1214 | kvmhv_put_nested(gp); | |
1215 | } | |
1216 | break; | |
1217 | case 3: | |
1218 | /* Invalidate ALL LPIDs */ | |
1219 | kvmhv_emulate_tlbie_all_lpid(vcpu, ric); | |
1220 | break; | |
1221 | default: | |
1222 | ret = -EINVAL; | |
1223 | break; | |
1224 | } | |
1225 | ||
1226 | return ret; | |
1227 | } | |
1228 | ||
1229 | /* | |
1230 | * This handles the H_TLB_INVALIDATE hcall. | |
1231 | * Parameters are (r4) tlbie instruction code, (r5) rS contents, | |
1232 | * (r6) rB contents. | |
1233 | */ | |
1234 | long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu) | |
1235 | { | |
1236 | int ret; | |
1237 | ||
1238 | ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4), | |
1239 | kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6)); | |
1240 | if (ret) | |
1241 | return H_PARAMETER; | |
1242 | return H_SUCCESS; | |
1243 | } | |
1244 | ||
53324b51 BR |
1245 | static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu, |
1246 | unsigned long lpid, unsigned long ric) | |
1247 | { | |
1248 | struct kvm *kvm = vcpu->kvm; | |
1249 | struct kvm_nested_guest *gp; | |
1250 | ||
1251 | gp = kvmhv_get_nested(kvm, lpid, false); | |
1252 | if (gp) { | |
1253 | kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); | |
1254 | kvmhv_put_nested(gp); | |
1255 | } | |
1256 | return H_SUCCESS; | |
1257 | } | |
1258 | ||
1259 | /* | |
1260 | * Number of pages above which we invalidate the entire LPID rather than | |
1261 | * flush individual pages. | |
1262 | */ | |
1263 | static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33; | |
1264 | ||
1265 | static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu, | |
1266 | unsigned long lpid, | |
1267 | unsigned long pg_sizes, | |
1268 | unsigned long start, | |
1269 | unsigned long end) | |
1270 | { | |
1271 | int ret = H_P4; | |
1272 | unsigned long addr, nr_pages; | |
1273 | struct mmu_psize_def *def; | |
1274 | unsigned long psize, ap, page_size; | |
1275 | bool flush_lpid; | |
1276 | ||
1277 | for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { | |
1278 | def = &mmu_psize_defs[psize]; | |
1279 | if (!(pg_sizes & def->h_rpt_pgsize)) | |
1280 | continue; | |
1281 | ||
1282 | nr_pages = (end - start) >> def->shift; | |
1283 | flush_lpid = nr_pages > tlb_range_flush_page_ceiling; | |
1284 | if (flush_lpid) | |
1285 | return do_tlb_invalidate_nested_all(vcpu, lpid, | |
1286 | RIC_FLUSH_TLB); | |
1287 | addr = start; | |
1288 | ap = mmu_get_ap(psize); | |
1289 | page_size = 1UL << def->shift; | |
1290 | do { | |
1291 | ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, | |
1292 | get_epn(addr)); | |
1293 | if (ret) | |
1294 | return H_P4; | |
1295 | addr += page_size; | |
1296 | } while (addr < end); | |
1297 | } | |
1298 | return ret; | |
1299 | } | |
1300 | ||
1301 | /* | |
1302 | * Performs partition-scoped invalidations for nested guests | |
1303 | * as part of H_RPT_INVALIDATE hcall. | |
1304 | */ | |
1305 | long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid, | |
1306 | unsigned long type, unsigned long pg_sizes, | |
1307 | unsigned long start, unsigned long end) | |
1308 | { | |
1309 | /* | |
1310 | * If L2 lpid isn't valid, we need to return H_PARAMETER. | |
1311 | * | |
1312 | * However, nested KVM issues a L2 lpid flush call when creating | |
1313 | * partition table entries for L2. This happens even before the | |
1314 | * corresponding shadow lpid is created in HV which happens in | |
1315 | * H_ENTER_NESTED call. Since we can't differentiate this case from | |
1316 | * the invalid case, we ignore such flush requests and return success. | |
1317 | */ | |
1318 | if (!kvmhv_find_nested(vcpu->kvm, lpid)) | |
1319 | return H_SUCCESS; | |
1320 | ||
1321 | /* | |
1322 | * A flush all request can be handled by a full lpid flush only. | |
1323 | */ | |
1324 | if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL) | |
1325 | return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL); | |
1326 | ||
1327 | /* | |
1328 | * We don't need to handle a PWC flush like process table here, | |
1329 | * because intermediate partition scoped table in nested guest doesn't | |
1330 | * really have PWC. Only level we have PWC is in L0 and for nested | |
1331 | * invalidate at L0 we always do kvm_flush_lpid() which does | |
1332 | * radix__flush_all_lpid(). For range invalidate at any level, we | |
1333 | * are not removing the higher level page tables and hence there is | |
1334 | * no PWC invalidate needed. | |
1335 | * | |
1336 | * if (type & H_RPTI_TYPE_PWC) { | |
1337 | * ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC); | |
1338 | * if (ret) | |
1339 | * return H_P4; | |
1340 | * } | |
1341 | */ | |
1342 | ||
1343 | if (start == 0 && end == -1) | |
1344 | return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB); | |
1345 | ||
1346 | if (type & H_RPTI_TYPE_TLB) | |
1347 | return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes, | |
1348 | start, end); | |
1349 | return H_SUCCESS; | |
1350 | } | |
1351 | ||
fd10be25 SJS |
1352 | /* Used to convert a nested guest real address to a L1 guest real address */ |
1353 | static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, | |
1354 | struct kvm_nested_guest *gp, | |
1355 | unsigned long n_gpa, unsigned long dsisr, | |
1356 | struct kvmppc_pte *gpte_p) | |
360cae31 | 1357 | { |
fd10be25 SJS |
1358 | u64 fault_addr, flags = dsisr & DSISR_ISSTORE; |
1359 | int ret; | |
1360 | ||
1361 | ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr, | |
1362 | &fault_addr); | |
1363 | ||
1364 | if (ret) { | |
1365 | /* We didn't find a pte */ | |
1366 | if (ret == -EINVAL) { | |
1367 | /* Unsupported mmu config */ | |
1368 | flags |= DSISR_UNSUPP_MMU; | |
1369 | } else if (ret == -ENOENT) { | |
1370 | /* No translation found */ | |
1371 | flags |= DSISR_NOHPTE; | |
1372 | } else if (ret == -EFAULT) { | |
1373 | /* Couldn't access L1 real address */ | |
1374 | flags |= DSISR_PRTABLE_FAULT; | |
1375 | vcpu->arch.fault_gpa = fault_addr; | |
1376 | } else { | |
1377 | /* Unknown error */ | |
1378 | return ret; | |
1379 | } | |
1380 | goto forward_to_l1; | |
1381 | } else { | |
1382 | /* We found a pte -> check permissions */ | |
1383 | if (dsisr & DSISR_ISSTORE) { | |
1384 | /* Can we write? */ | |
1385 | if (!gpte_p->may_write) { | |
1386 | flags |= DSISR_PROTFAULT; | |
1387 | goto forward_to_l1; | |
1388 | } | |
1389 | } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { | |
1390 | /* Can we execute? */ | |
1391 | if (!gpte_p->may_execute) { | |
b691505e | 1392 | flags |= SRR1_ISI_N_G_OR_CIP; |
fd10be25 SJS |
1393 | goto forward_to_l1; |
1394 | } | |
1395 | } else { | |
1396 | /* Can we read? */ | |
1397 | if (!gpte_p->may_read && !gpte_p->may_write) { | |
1398 | flags |= DSISR_PROTFAULT; | |
1399 | goto forward_to_l1; | |
1400 | } | |
1401 | } | |
1402 | } | |
1403 | ||
1404 | return 0; | |
1405 | ||
1406 | forward_to_l1: | |
1407 | vcpu->arch.fault_dsisr = flags; | |
1408 | if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { | |
9ee6471e | 1409 | vcpu->arch.shregs.msr &= SRR1_MSR_BITS; |
fd10be25 SJS |
1410 | vcpu->arch.shregs.msr |= flags; |
1411 | } | |
360cae31 PM |
1412 | return RESUME_HOST; |
1413 | } | |
fd10be25 SJS |
1414 | |
1415 | static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu, | |
1416 | struct kvm_nested_guest *gp, | |
1417 | unsigned long n_gpa, | |
1418 | struct kvmppc_pte gpte, | |
1419 | unsigned long dsisr) | |
1420 | { | |
1421 | struct kvm *kvm = vcpu->kvm; | |
1422 | bool writing = !!(dsisr & DSISR_ISSTORE); | |
1423 | u64 pgflags; | |
bec6e03b | 1424 | long ret; |
fd10be25 SJS |
1425 | |
1426 | /* Are the rc bits set in the L1 partition scoped pte? */ | |
1427 | pgflags = _PAGE_ACCESSED; | |
1428 | if (writing) | |
1429 | pgflags |= _PAGE_DIRTY; | |
1430 | if (pgflags & ~gpte.rc) | |
1431 | return RESUME_HOST; | |
1432 | ||
1433 | spin_lock(&kvm->mmu_lock); | |
1434 | /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */ | |
6cdf3037 AK |
1435 | ret = kvmppc_hv_handle_set_rc(kvm, false, writing, |
1436 | gpte.raddr, kvm->arch.lpid); | |
bec6e03b SJS |
1437 | if (!ret) { |
1438 | ret = -EINVAL; | |
1439 | goto out_unlock; | |
1440 | } | |
fd10be25 SJS |
1441 | |
1442 | /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */ | |
6cdf3037 | 1443 | ret = kvmppc_hv_handle_set_rc(kvm, true, writing, |
e881bfaf | 1444 | n_gpa, gp->l1_lpid); |
fd10be25 | 1445 | if (!ret) |
bec6e03b SJS |
1446 | ret = -EINVAL; |
1447 | else | |
1448 | ret = 0; | |
1449 | ||
1450 | out_unlock: | |
1451 | spin_unlock(&kvm->mmu_lock); | |
1452 | return ret; | |
fd10be25 SJS |
1453 | } |
1454 | ||
1455 | static inline int kvmppc_radix_level_to_shift(int level) | |
1456 | { | |
1457 | switch (level) { | |
1458 | case 2: | |
1459 | return PUD_SHIFT; | |
1460 | case 1: | |
1461 | return PMD_SHIFT; | |
1462 | default: | |
1463 | return PAGE_SHIFT; | |
1464 | } | |
1465 | } | |
1466 | ||
1467 | static inline int kvmppc_radix_shift_to_level(int shift) | |
1468 | { | |
1469 | if (shift == PUD_SHIFT) | |
1470 | return 2; | |
1471 | if (shift == PMD_SHIFT) | |
1472 | return 1; | |
1473 | if (shift == PAGE_SHIFT) | |
1474 | return 0; | |
1475 | WARN_ON_ONCE(1); | |
1476 | return 0; | |
1477 | } | |
1478 | ||
1479 | /* called with gp->tlb_lock held */ | |
8c99d345 | 1480 | static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, |
fd10be25 SJS |
1481 | struct kvm_nested_guest *gp) |
1482 | { | |
1483 | struct kvm *kvm = vcpu->kvm; | |
1484 | struct kvm_memory_slot *memslot; | |
8cf531ed | 1485 | struct rmap_nested *n_rmap; |
fd10be25 SJS |
1486 | struct kvmppc_pte gpte; |
1487 | pte_t pte, *pte_p; | |
1488 | unsigned long mmu_seq; | |
1489 | unsigned long dsisr = vcpu->arch.fault_dsisr; | |
1490 | unsigned long ea = vcpu->arch.fault_dar; | |
8cf531ed | 1491 | unsigned long *rmapp; |
fd10be25 SJS |
1492 | unsigned long n_gpa, gpa, gfn, perm = 0UL; |
1493 | unsigned int shift, l1_shift, level; | |
1494 | bool writing = !!(dsisr & DSISR_ISSTORE); | |
1495 | bool kvm_ro = false; | |
1496 | long int ret; | |
1497 | ||
1498 | if (!gp->l1_gr_to_hr) { | |
1499 | kvmhv_update_ptbl_cache(gp); | |
1500 | if (!gp->l1_gr_to_hr) | |
1501 | return RESUME_HOST; | |
1502 | } | |
1503 | ||
1504 | /* Convert the nested guest real address into a L1 guest real address */ | |
1505 | ||
1506 | n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL; | |
1507 | if (!(dsisr & DSISR_PRTABLE_FAULT)) | |
1508 | n_gpa |= ea & 0xFFF; | |
1509 | ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte); | |
1510 | ||
1511 | /* | |
1512 | * If the hardware found a translation but we don't now have a usable | |
1513 | * translation in the l1 partition-scoped tree, remove the shadow pte | |
1514 | * and let the guest retry. | |
1515 | */ | |
1516 | if (ret == RESUME_HOST && | |
1517 | (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G | | |
1518 | DSISR_BAD_COPYPASTE))) | |
1519 | goto inval; | |
1520 | if (ret) | |
1521 | return ret; | |
1522 | ||
1523 | /* Failed to set the reference/change bits */ | |
1524 | if (dsisr & DSISR_SET_RC) { | |
1525 | ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr); | |
1526 | if (ret == RESUME_HOST) | |
1527 | return ret; | |
1528 | if (ret) | |
1529 | goto inval; | |
1530 | dsisr &= ~DSISR_SET_RC; | |
1531 | if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE | | |
1532 | DSISR_PROTFAULT))) | |
1533 | return RESUME_GUEST; | |
1534 | } | |
1535 | ||
1536 | /* | |
1537 | * We took an HISI or HDSI while we were running a nested guest which | |
1538 | * means we have no partition scoped translation for that. This means | |
1539 | * we need to insert a pte for the mapping into our shadow_pgtable. | |
1540 | */ | |
1541 | ||
1542 | l1_shift = gpte.page_shift; | |
1543 | if (l1_shift < PAGE_SHIFT) { | |
1544 | /* We don't support l1 using a page size smaller than our own */ | |
1545 | pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n", | |
1546 | l1_shift, PAGE_SHIFT); | |
1547 | return -EINVAL; | |
1548 | } | |
1549 | gpa = gpte.raddr; | |
1550 | gfn = gpa >> PAGE_SHIFT; | |
1551 | ||
1552 | /* 1. Get the corresponding host memslot */ | |
1553 | ||
1554 | memslot = gfn_to_memslot(kvm, gfn); | |
1555 | if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { | |
1556 | if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) { | |
1557 | /* unusual error -> reflect to the guest as a DSI */ | |
1558 | kvmppc_core_queue_data_storage(vcpu, ea, dsisr); | |
1559 | return RESUME_GUEST; | |
1560 | } | |
873db2cd SJS |
1561 | |
1562 | /* passthrough of emulated MMIO case */ | |
8c99d345 | 1563 | return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing); |
fd10be25 SJS |
1564 | } |
1565 | if (memslot->flags & KVM_MEM_READONLY) { | |
1566 | if (writing) { | |
1567 | /* Give the guest a DSI */ | |
1568 | kvmppc_core_queue_data_storage(vcpu, ea, | |
1569 | DSISR_ISSTORE | DSISR_PROTFAULT); | |
1570 | return RESUME_GUEST; | |
1571 | } | |
1572 | kvm_ro = true; | |
1573 | } | |
1574 | ||
1575 | /* 2. Find the host pte for this L1 guest real address */ | |
1576 | ||
1577 | /* Used to check for invalidations in progress */ | |
1578 | mmu_seq = kvm->mmu_notifier_seq; | |
1579 | smp_rmb(); | |
1580 | ||
1581 | /* See if can find translation in our partition scoped tables for L1 */ | |
1582 | pte = __pte(0); | |
1583 | spin_lock(&kvm->mmu_lock); | |
4b99412e | 1584 | pte_p = find_kvm_secondary_pte(kvm, gpa, &shift); |
fd10be25 SJS |
1585 | if (!shift) |
1586 | shift = PAGE_SHIFT; | |
1587 | if (pte_p) | |
1588 | pte = *pte_p; | |
1589 | spin_unlock(&kvm->mmu_lock); | |
1590 | ||
1591 | if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) { | |
1592 | /* No suitable pte found -> try to insert a mapping */ | |
1593 | ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, | |
1594 | writing, kvm_ro, &pte, &level); | |
1595 | if (ret == -EAGAIN) | |
1596 | return RESUME_GUEST; | |
1597 | else if (ret) | |
1598 | return ret; | |
1599 | shift = kvmppc_radix_level_to_shift(level); | |
1600 | } | |
8400f874 SJS |
1601 | /* Align gfn to the start of the page */ |
1602 | gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT; | |
fd10be25 SJS |
1603 | |
1604 | /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */ | |
1605 | ||
1606 | /* The permissions is the combination of the host and l1 guest ptes */ | |
1607 | perm |= gpte.may_read ? 0UL : _PAGE_READ; | |
1608 | perm |= gpte.may_write ? 0UL : _PAGE_WRITE; | |
1609 | perm |= gpte.may_execute ? 0UL : _PAGE_EXEC; | |
8b23eee4 SJS |
1610 | /* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */ |
1611 | perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED; | |
1612 | perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY; | |
fd10be25 SJS |
1613 | pte = __pte(pte_val(pte) & ~perm); |
1614 | ||
1615 | /* What size pte can we insert? */ | |
1616 | if (shift > l1_shift) { | |
1617 | u64 mask; | |
1618 | unsigned int actual_shift = PAGE_SHIFT; | |
1619 | if (PMD_SHIFT < l1_shift) | |
1620 | actual_shift = PMD_SHIFT; | |
1621 | mask = (1UL << shift) - (1UL << actual_shift); | |
1622 | pte = __pte(pte_val(pte) | (gpa & mask)); | |
1623 | shift = actual_shift; | |
1624 | } | |
1625 | level = kvmppc_radix_shift_to_level(shift); | |
1626 | n_gpa &= ~((1UL << shift) - 1); | |
1627 | ||
1628 | /* 4. Insert the pte into our shadow_pgtable */ | |
1629 | ||
8cf531ed SJS |
1630 | n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL); |
1631 | if (!n_rmap) | |
1632 | return RESUME_GUEST; /* Let the guest try again */ | |
1633 | n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) | | |
1634 | (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT); | |
1635 | rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; | |
fd10be25 | 1636 | ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level, |
8cf531ed | 1637 | mmu_seq, gp->shadow_lpid, rmapp, &n_rmap); |
32e594f9 | 1638 | kfree(n_rmap); |
fd10be25 SJS |
1639 | if (ret == -EAGAIN) |
1640 | ret = RESUME_GUEST; /* Let the guest try again */ | |
1641 | ||
1642 | return ret; | |
1643 | ||
1644 | inval: | |
1645 | kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL); | |
1646 | return RESUME_GUEST; | |
1647 | } | |
1648 | ||
8c99d345 | 1649 | long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) |
fd10be25 SJS |
1650 | { |
1651 | struct kvm_nested_guest *gp = vcpu->arch.nested; | |
1652 | long int ret; | |
1653 | ||
1654 | mutex_lock(&gp->tlb_lock); | |
8c99d345 | 1655 | ret = __kvmhv_nested_page_fault(vcpu, gp); |
fd10be25 SJS |
1656 | mutex_unlock(&gp->tlb_lock); |
1657 | return ret; | |
1658 | } | |
83a05510 PM |
1659 | |
1660 | int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid) | |
1661 | { | |
1662 | int ret = -1; | |
1663 | ||
1664 | spin_lock(&kvm->mmu_lock); | |
1665 | while (++lpid <= kvm->arch.max_nested_lpid) { | |
1666 | if (kvm->arch.nested_guests[lpid]) { | |
1667 | ret = lpid; | |
1668 | break; | |
1669 | } | |
1670 | } | |
1671 | spin_unlock(&kvm->mmu_lock); | |
1672 | return ret; | |
1673 | } |