]>
Commit | Line | Data |
---|---|---|
caab277b | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
37c43753 MZ |
2 | /* |
3 | * Copyright (C) 2012,2013 - ARM Ltd | |
4 | * Author: Marc Zyngier <marc.zyngier@arm.com> | |
37c43753 MZ |
5 | */ |
6 | ||
7 | #ifndef __ARM64_KVM_MMU_H__ | |
8 | #define __ARM64_KVM_MMU_H__ | |
9 | ||
10 | #include <asm/page.h> | |
11 | #include <asm/memory.h> | |
20475f78 | 12 | #include <asm/cpufeature.h> |
37c43753 MZ |
13 | |
14 | /* | |
cedbb8b7 | 15 | * As ARMv8.0 only has the TTBR0_EL2 register, we cannot express |
37c43753 MZ |
16 | * "negative" addresses. This makes it impossible to directly share |
17 | * mappings with the kernel. | |
18 | * | |
19 | * Instead, give the HYP mode its own VA region at a fixed offset from | |
20 | * the kernel by just masking the top bits (which are all ones for a | |
82a81bff | 21 | * kernel address). We need to find out how many bits to mask. |
cedbb8b7 | 22 | * |
82a81bff MZ |
23 | * We want to build a set of page tables that cover both parts of the |
24 | * idmap (the trampoline page used to initialize EL2), and our normal | |
25 | * runtime VA space, at the same time. | |
26 | * | |
27 | * Given that the kernel uses VA_BITS for its entire address space, | |
28 | * and that half of that space (VA_BITS - 1) is used for the linear | |
29 | * mapping, we can also limit the EL2 space to (VA_BITS - 1). | |
30 | * | |
31 | * The main question is "Within the VA_BITS space, does EL2 use the | |
32 | * top or the bottom half of that space to shadow the kernel's linear | |
33 | * mapping?". As we need to idmap the trampoline page, this is | |
34 | * determined by the range in which this page lives. | |
35 | * | |
36 | * If the page is in the bottom half, we have to use the top half. If | |
37 | * the page is in the top half, we have to use the bottom half: | |
38 | * | |
2077be67 | 39 | * T = __pa_symbol(__hyp_idmap_text_start) |
82a81bff MZ |
40 | * if (T & BIT(VA_BITS - 1)) |
41 | * HYP_VA_MIN = 0 //idmap in upper half | |
42 | * else | |
43 | * HYP_VA_MIN = 1 << (VA_BITS - 1) | |
44 | * HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1 | |
45 | * | |
46 | * This of course assumes that the trampoline page exists within the | |
47 | * VA_BITS range. If it doesn't, then it means we're in the odd case | |
48 | * where the kernel idmap (as well as HYP) uses more levels than the | |
49 | * kernel runtime page tables (as seen when the kernel is configured | |
50 | * for 4k pages, 39bits VA, and yet memory lives just above that | |
51 | * limit, forcing the idmap to use 4 levels of page tables while the | |
52 | * kernel itself only uses 3). In this particular case, it doesn't | |
53 | * matter which side of VA_BITS we use, as we're guaranteed not to | |
54 | * conflict with anything. | |
55 | * | |
56 | * When using VHE, there are no separate hyp mappings and all KVM | |
57 | * functionality is already mapped as part of the main kernel | |
58 | * mappings, and none of this applies in that case. | |
37c43753 | 59 | */ |
d53d9bc6 | 60 | |
37c43753 MZ |
61 | #ifdef __ASSEMBLY__ |
62 | ||
cedbb8b7 | 63 | #include <asm/alternative.h> |
cedbb8b7 | 64 | |
37c43753 MZ |
65 | /* |
66 | * Convert a kernel VA into a HYP VA. | |
67 | * reg: VA to be converted. | |
fd81e6bf | 68 | * |
2b4d1606 MZ |
69 | * The actual code generation takes place in kvm_update_va_mask, and |
70 | * the instructions below are only there to reserve the space and | |
71 | * perform the register allocation (kvm_update_va_mask uses the | |
72 | * specific registers encoded in the instructions). | |
37c43753 MZ |
73 | */ |
74 | .macro kern_hyp_va reg | |
2b4d1606 | 75 | alternative_cb kvm_update_va_mask |
ed57cac8 MZ |
76 | and \reg, \reg, #1 /* mask with va_mask */ |
77 | ror \reg, \reg, #1 /* rotate to the first tag bit */ | |
78 | add \reg, \reg, #0 /* insert the low 12 bits of the tag */ | |
79 | add \reg, \reg, #0, lsl 12 /* insert the top 12 bits of the tag */ | |
80 | ror \reg, \reg, #63 /* rotate back */ | |
2b4d1606 | 81 | alternative_cb_end |
37c43753 MZ |
82 | .endm |
83 | ||
84 | #else | |
85 | ||
65fddcfc | 86 | #include <linux/pgtable.h> |
38f791a4 | 87 | #include <asm/pgalloc.h> |
02f7760e | 88 | #include <asm/cache.h> |
37c43753 | 89 | #include <asm/cacheflush.h> |
e4c5a685 | 90 | #include <asm/mmu_context.h> |
37c43753 | 91 | |
2b4d1606 MZ |
92 | void kvm_update_va_mask(struct alt_instr *alt, |
93 | __le32 *origptr, __le32 *updptr, int nr_inst); | |
0492747c | 94 | void kvm_compute_layout(void); |
2b4d1606 | 95 | |
5c37f1ae | 96 | static __always_inline unsigned long __kern_hyp_va(unsigned long v) |
fd81e6bf | 97 | { |
ed57cac8 MZ |
98 | asm volatile(ALTERNATIVE_CB("and %0, %0, #1\n" |
99 | "ror %0, %0, #1\n" | |
100 | "add %0, %0, #0\n" | |
101 | "add %0, %0, #0, lsl 12\n" | |
102 | "ror %0, %0, #63\n", | |
2b4d1606 MZ |
103 | kvm_update_va_mask) |
104 | : "+r" (v)); | |
fd81e6bf MZ |
105 | return v; |
106 | } | |
107 | ||
94d0e598 | 108 | #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) |
37c43753 | 109 | |
44a497ab MZ |
110 | /* |
111 | * Obtain the PC-relative address of a kernel symbol | |
112 | * s: symbol | |
113 | * | |
114 | * The goal of this macro is to return a symbol's address based on a | |
115 | * PC-relative computation, as opposed to a loading the VA from a | |
116 | * constant pool or something similar. This works well for HYP, as an | |
117 | * absolute VA is guaranteed to be wrong. Only use this if trying to | |
118 | * obtain the address of a symbol (i.e. not something you obtained by | |
119 | * following a pointer). | |
120 | */ | |
121 | #define hyp_symbol_addr(s) \ | |
122 | ({ \ | |
123 | typeof(s) *addr; \ | |
124 | asm("adrp %0, %1\n" \ | |
125 | "add %0, %0, :lo12:%1\n" \ | |
126 | : "=r" (addr) : "S" (&s)); \ | |
127 | addr; \ | |
128 | }) | |
129 | ||
37c43753 | 130 | /* |
1b44471b ZY |
131 | * We currently support using a VM-specified IPA size. For backward |
132 | * compatibility, the default IPA size is fixed to 40bits. | |
37c43753 | 133 | */ |
dbff124e | 134 | #define KVM_PHYS_SHIFT (40) |
e55cac5b | 135 | |
13ac4bbc | 136 | #define kvm_phys_shift(kvm) VTCR_EL2_IPA(kvm->arch.vtcr) |
e55cac5b SP |
137 | #define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) |
138 | #define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) | |
37c43753 | 139 | |
865b30cd SP |
140 | static inline bool kvm_page_empty(void *ptr) |
141 | { | |
142 | struct page *ptr_page = virt_to_page(ptr); | |
143 | return page_count(ptr_page) == 1; | |
144 | } | |
37c43753 | 145 | |
c0ef6326 SP |
146 | #include <asm/stage2_pgtable.h> |
147 | ||
c8dddecd | 148 | int create_hyp_mappings(void *from, void *to, pgprot_t prot); |
807a3784 | 149 | int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, |
1bb32a44 MZ |
150 | void __iomem **kaddr, |
151 | void __iomem **haddr); | |
dc2e4633 MZ |
152 | int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, |
153 | void **haddr); | |
37c43753 MZ |
154 | void free_hyp_pgds(void); |
155 | ||
957db105 | 156 | void stage2_unmap_vm(struct kvm *kvm); |
37c43753 MZ |
157 | int kvm_alloc_stage2_pgd(struct kvm *kvm); |
158 | void kvm_free_stage2_pgd(struct kvm *kvm); | |
159 | int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, | |
c40f2f8f | 160 | phys_addr_t pa, unsigned long size, bool writable); |
37c43753 MZ |
161 | |
162 | int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); | |
163 | ||
164 | void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); | |
165 | ||
166 | phys_addr_t kvm_mmu_get_httbr(void); | |
37c43753 MZ |
167 | phys_addr_t kvm_get_idmap_vector(void); |
168 | int kvm_mmu_init(void); | |
169 | void kvm_clear_hyp_idmap(void); | |
170 | ||
0db9dd8a MZ |
171 | #define kvm_mk_pmd(ptep) \ |
172 | __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE) | |
173 | #define kvm_mk_pud(pmdp) \ | |
174 | __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE) | |
e9f63768 MR |
175 | #define kvm_mk_p4d(pmdp) \ |
176 | __p4d(__phys_to_p4d_val(__pa(pmdp)) | PUD_TYPE_TABLE) | |
0db9dd8a | 177 | |
b8e0ba7c PA |
178 | #define kvm_set_pud(pudp, pud) set_pud(pudp, pud) |
179 | ||
f8df7338 PA |
180 | #define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot) |
181 | #define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot) | |
b8e0ba7c | 182 | #define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot) |
f8df7338 | 183 | |
eb3f0624 PA |
184 | #define kvm_pud_pfn(pud) pud_pfn(pud) |
185 | ||
f8df7338 | 186 | #define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd) |
b8e0ba7c | 187 | #define kvm_pud_mkhuge(pud) pud_mkhuge(pud) |
f8df7338 | 188 | |
06485053 | 189 | static inline pte_t kvm_s2pte_mkwrite(pte_t pte) |
37c43753 | 190 | { |
06485053 CM |
191 | pte_val(pte) |= PTE_S2_RDWR; |
192 | return pte; | |
37c43753 MZ |
193 | } |
194 | ||
06485053 | 195 | static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) |
ad361f09 | 196 | { |
06485053 CM |
197 | pmd_val(pmd) |= PMD_S2_RDWR; |
198 | return pmd; | |
ad361f09 CD |
199 | } |
200 | ||
b8e0ba7c PA |
201 | static inline pud_t kvm_s2pud_mkwrite(pud_t pud) |
202 | { | |
203 | pud_val(pud) |= PUD_S2_RDWR; | |
204 | return pud; | |
205 | } | |
206 | ||
d0e22b4a MZ |
207 | static inline pte_t kvm_s2pte_mkexec(pte_t pte) |
208 | { | |
209 | pte_val(pte) &= ~PTE_S2_XN; | |
210 | return pte; | |
211 | } | |
212 | ||
213 | static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd) | |
214 | { | |
215 | pmd_val(pmd) &= ~PMD_S2_XN; | |
216 | return pmd; | |
217 | } | |
218 | ||
b8e0ba7c PA |
219 | static inline pud_t kvm_s2pud_mkexec(pud_t pud) |
220 | { | |
221 | pud_val(pud) &= ~PUD_S2_XN; | |
222 | return pud; | |
223 | } | |
224 | ||
20a004e7 | 225 | static inline void kvm_set_s2pte_readonly(pte_t *ptep) |
8199ed0e | 226 | { |
0966253d CM |
227 | pteval_t old_pteval, pteval; |
228 | ||
20a004e7 | 229 | pteval = READ_ONCE(pte_val(*ptep)); |
0966253d CM |
230 | do { |
231 | old_pteval = pteval; | |
232 | pteval &= ~PTE_S2_RDWR; | |
233 | pteval |= PTE_S2_RDONLY; | |
20a004e7 | 234 | pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); |
0966253d | 235 | } while (pteval != old_pteval); |
8199ed0e MS |
236 | } |
237 | ||
20a004e7 | 238 | static inline bool kvm_s2pte_readonly(pte_t *ptep) |
8199ed0e | 239 | { |
20a004e7 | 240 | return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY; |
8199ed0e MS |
241 | } |
242 | ||
20a004e7 | 243 | static inline bool kvm_s2pte_exec(pte_t *ptep) |
7a3796d2 | 244 | { |
20a004e7 | 245 | return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN); |
7a3796d2 MZ |
246 | } |
247 | ||
20a004e7 | 248 | static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp) |
8199ed0e | 249 | { |
20a004e7 | 250 | kvm_set_s2pte_readonly((pte_t *)pmdp); |
8199ed0e MS |
251 | } |
252 | ||
20a004e7 | 253 | static inline bool kvm_s2pmd_readonly(pmd_t *pmdp) |
8199ed0e | 254 | { |
20a004e7 | 255 | return kvm_s2pte_readonly((pte_t *)pmdp); |
38f791a4 CD |
256 | } |
257 | ||
20a004e7 | 258 | static inline bool kvm_s2pmd_exec(pmd_t *pmdp) |
7a3796d2 | 259 | { |
20a004e7 | 260 | return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN); |
7a3796d2 MZ |
261 | } |
262 | ||
4ea5af53 PA |
263 | static inline void kvm_set_s2pud_readonly(pud_t *pudp) |
264 | { | |
265 | kvm_set_s2pte_readonly((pte_t *)pudp); | |
266 | } | |
267 | ||
268 | static inline bool kvm_s2pud_readonly(pud_t *pudp) | |
269 | { | |
270 | return kvm_s2pte_readonly((pte_t *)pudp); | |
271 | } | |
272 | ||
86d1c55e PA |
273 | static inline bool kvm_s2pud_exec(pud_t *pudp) |
274 | { | |
275 | return !(READ_ONCE(pud_val(*pudp)) & PUD_S2_XN); | |
276 | } | |
277 | ||
eb3f0624 PA |
278 | static inline pud_t kvm_s2pud_mkyoung(pud_t pud) |
279 | { | |
280 | return pud_mkyoung(pud); | |
281 | } | |
282 | ||
35a63966 PA |
283 | static inline bool kvm_s2pud_young(pud_t pud) |
284 | { | |
285 | return pud_young(pud); | |
286 | } | |
287 | ||
66f877fa | 288 | #define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) |
38f791a4 CD |
289 | |
290 | #ifdef __PAGETABLE_PMD_FOLDED | |
66f877fa | 291 | #define hyp_pmd_table_empty(pmdp) (0) |
38f791a4 | 292 | #else |
66f877fa | 293 | #define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) |
38f791a4 CD |
294 | #endif |
295 | ||
296 | #ifdef __PAGETABLE_PUD_FOLDED | |
66f877fa | 297 | #define hyp_pud_table_empty(pudp) (0) |
4f853a71 | 298 | #else |
66f877fa | 299 | #define hyp_pud_table_empty(pudp) kvm_page_empty(pudp) |
4f853a71 | 300 | #endif |
4f853a71 | 301 | |
e9f63768 MR |
302 | #ifdef __PAGETABLE_P4D_FOLDED |
303 | #define hyp_p4d_table_empty(p4dp) (0) | |
304 | #else | |
305 | #define hyp_p4d_table_empty(p4dp) kvm_page_empty(p4dp) | |
306 | #endif | |
307 | ||
37c43753 MZ |
308 | struct kvm; |
309 | ||
2d58b733 MZ |
310 | #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) |
311 | ||
312 | static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) | |
37c43753 | 313 | { |
8d404c4c | 314 | return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; |
2d58b733 MZ |
315 | } |
316 | ||
17ab9d57 | 317 | static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) |
2d58b733 | 318 | { |
0d3e4d4f MZ |
319 | void *va = page_address(pfn_to_page(pfn)); |
320 | ||
e48d53a9 MZ |
321 | /* |
322 | * With FWB, we ensure that the guest always accesses memory using | |
323 | * cacheable attributes, and we don't have to clean to PoC when | |
324 | * faulting in pages. Furthermore, FWB implies IDC, so cleaning to | |
325 | * PoU is not required either in this case. | |
326 | */ | |
327 | if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) | |
328 | return; | |
329 | ||
8f36ebaf | 330 | kvm_flush_dcache_to_poc(va, size); |
a15f6939 | 331 | } |
2d58b733 | 332 | |
17ab9d57 | 333 | static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, |
a15f6939 MZ |
334 | unsigned long size) |
335 | { | |
87da236e | 336 | if (icache_is_aliasing()) { |
37c43753 MZ |
337 | /* any kind of VIPT cache */ |
338 | __flush_icache_all(); | |
87da236e WD |
339 | } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { |
340 | /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ | |
a15f6939 MZ |
341 | void *va = page_address(pfn_to_page(pfn)); |
342 | ||
4fee9473 MZ |
343 | invalidate_icache_range((unsigned long)va, |
344 | (unsigned long)va + size); | |
37c43753 MZ |
345 | } |
346 | } | |
347 | ||
363ef89f MZ |
348 | static inline void __kvm_flush_dcache_pte(pte_t pte) |
349 | { | |
e48d53a9 MZ |
350 | if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { |
351 | struct page *page = pte_page(pte); | |
352 | kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); | |
353 | } | |
363ef89f MZ |
354 | } |
355 | ||
356 | static inline void __kvm_flush_dcache_pmd(pmd_t pmd) | |
357 | { | |
e48d53a9 MZ |
358 | if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { |
359 | struct page *page = pmd_page(pmd); | |
360 | kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); | |
361 | } | |
363ef89f MZ |
362 | } |
363 | ||
364 | static inline void __kvm_flush_dcache_pud(pud_t pud) | |
365 | { | |
e48d53a9 MZ |
366 | if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { |
367 | struct page *page = pud_page(pud); | |
368 | kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); | |
369 | } | |
363ef89f MZ |
370 | } |
371 | ||
3c1e7165 MZ |
372 | void kvm_set_way_flush(struct kvm_vcpu *vcpu); |
373 | void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); | |
9d218a1f | 374 | |
e4c5a685 AB |
375 | static inline bool __kvm_cpu_uses_extended_idmap(void) |
376 | { | |
fa2a8445 KM |
377 | return __cpu_uses_extended_idmap_level(); |
378 | } | |
379 | ||
380 | static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) | |
381 | { | |
382 | return idmap_ptrs_per_pgd; | |
e4c5a685 AB |
383 | } |
384 | ||
19338304 KM |
385 | /* |
386 | * Can't use pgd_populate here, because the extended idmap adds an extra level | |
387 | * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended | |
388 | * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4. | |
389 | */ | |
e4c5a685 AB |
390 | static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, |
391 | pgd_t *hyp_pgd, | |
392 | pgd_t *merged_hyp_pgd, | |
393 | unsigned long hyp_idmap_start) | |
394 | { | |
395 | int idmap_idx; | |
75387b92 | 396 | u64 pgd_addr; |
e4c5a685 AB |
397 | |
398 | /* | |
399 | * Use the first entry to access the HYP mappings. It is | |
400 | * guaranteed to be free, otherwise we wouldn't use an | |
401 | * extended idmap. | |
402 | */ | |
403 | VM_BUG_ON(pgd_val(merged_hyp_pgd[0])); | |
75387b92 KM |
404 | pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd)); |
405 | merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE); | |
e4c5a685 AB |
406 | |
407 | /* | |
408 | * Create another extended level entry that points to the boot HYP map, | |
409 | * which contains an ID mapping of the HYP init code. We essentially | |
410 | * merge the boot and runtime HYP maps by doing so, but they don't | |
411 | * overlap anyway, so this is fine. | |
412 | */ | |
413 | idmap_idx = hyp_idmap_start >> VA_BITS; | |
414 | VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx])); | |
75387b92 KM |
415 | pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd)); |
416 | merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE); | |
e4c5a685 AB |
417 | } |
418 | ||
20475f78 VM |
419 | static inline unsigned int kvm_get_vmid_bits(void) |
420 | { | |
46823dd1 | 421 | int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); |
20475f78 | 422 | |
c73433fc | 423 | return get_vmid_bits(reg); |
20475f78 VM |
424 | } |
425 | ||
bf308242 AP |
426 | /* |
427 | * We are not in the kvm->srcu critical section most of the time, so we take | |
428 | * the SRCU read lock here. Since we copy the data from the user page, we | |
429 | * can immediately drop the lock again. | |
430 | */ | |
431 | static inline int kvm_read_guest_lock(struct kvm *kvm, | |
432 | gpa_t gpa, void *data, unsigned long len) | |
433 | { | |
434 | int srcu_idx = srcu_read_lock(&kvm->srcu); | |
435 | int ret = kvm_read_guest(kvm, gpa, data, len); | |
436 | ||
437 | srcu_read_unlock(&kvm->srcu, srcu_idx); | |
438 | ||
439 | return ret; | |
440 | } | |
441 | ||
a6ecfb11 MZ |
442 | static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, |
443 | const void *data, unsigned long len) | |
444 | { | |
445 | int srcu_idx = srcu_read_lock(&kvm->srcu); | |
446 | int ret = kvm_write_guest(kvm, gpa, data, len); | |
447 | ||
448 | srcu_read_unlock(&kvm->srcu, srcu_idx); | |
449 | ||
450 | return ret; | |
451 | } | |
452 | ||
dee39247 MZ |
453 | #ifdef CONFIG_KVM_INDIRECT_VECTORS |
454 | /* | |
455 | * EL2 vectors can be mapped and rerouted in a number of ways, | |
456 | * depending on the kernel configuration and CPU present: | |
457 | * | |
458 | * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the | |
459 | * hardening sequence is placed in one of the vector slots, which is | |
460 | * executed before jumping to the real vectors. | |
461 | * | |
462 | * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the | |
463 | * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the | |
464 | * hardening sequence is mapped next to the idmap page, and executed | |
465 | * before jumping to the real vectors. | |
466 | * | |
467 | * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an | |
468 | * empty slot is selected, mapped next to the idmap page, and | |
469 | * executed before jumping to the real vectors. | |
470 | * | |
471 | * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with | |
472 | * VHE, as we don't have hypervisor-specific mappings. If the system | |
473 | * is VHE and yet selects this capability, it will be ignored. | |
474 | */ | |
6840bdd7 MZ |
475 | #include <asm/mmu.h> |
476 | ||
dee39247 MZ |
477 | extern void *__kvm_bp_vect_base; |
478 | extern int __kvm_harden_el2_vector_slot; | |
479 | ||
438f711c | 480 | /* This is called on both VHE and !VHE systems */ |
6840bdd7 MZ |
481 | static inline void *kvm_get_hyp_vector(void) |
482 | { | |
483 | struct bp_hardening_data *data = arm64_get_bp_hardening_data(); | |
dee39247 MZ |
484 | void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); |
485 | int slot = -1; | |
6840bdd7 | 486 | |
dee39247 | 487 | if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) { |
6e52aab9 | 488 | vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); |
dee39247 MZ |
489 | slot = data->hyp_vectors_slot; |
490 | } | |
6840bdd7 | 491 | |
dee39247 MZ |
492 | if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { |
493 | vect = __kvm_bp_vect_base; | |
494 | if (slot == -1) | |
495 | slot = __kvm_harden_el2_vector_slot; | |
6840bdd7 MZ |
496 | } |
497 | ||
dee39247 MZ |
498 | if (slot != -1) |
499 | vect += slot * SZ_2K; | |
500 | ||
6840bdd7 MZ |
501 | return vect; |
502 | } | |
503 | ||
dee39247 | 504 | /* This is only called on a !VHE system */ |
6840bdd7 MZ |
505 | static inline int kvm_map_vectors(void) |
506 | { | |
dee39247 MZ |
507 | /* |
508 | * HBP = ARM64_HARDEN_BRANCH_PREDICTOR | |
509 | * HEL2 = ARM64_HARDEN_EL2_VECTORS | |
510 | * | |
511 | * !HBP + !HEL2 -> use direct vectors | |
512 | * HBP + !HEL2 -> use hardened vectors in place | |
513 | * !HBP + HEL2 -> allocate one vector slot and use exec mapping | |
514 | * HBP + HEL2 -> use hardened vertors and use exec mapping | |
515 | */ | |
516 | if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) { | |
6e52aab9 | 517 | __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); |
dee39247 MZ |
518 | __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); |
519 | } | |
520 | ||
521 | if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { | |
6e52aab9 MB |
522 | phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); |
523 | unsigned long size = __BP_HARDEN_HYP_VECS_SZ; | |
dee39247 MZ |
524 | |
525 | /* | |
526 | * Always allocate a spare vector slot, as we don't | |
527 | * know yet which CPUs have a BP hardening slot that | |
528 | * we can reuse. | |
529 | */ | |
530 | __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); | |
531 | BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); | |
532 | return create_hyp_exec_mappings(vect_pa, size, | |
533 | &__kvm_bp_vect_base); | |
534 | } | |
535 | ||
4340ba80 | 536 | return 0; |
6840bdd7 | 537 | } |
6840bdd7 MZ |
538 | #else |
539 | static inline void *kvm_get_hyp_vector(void) | |
540 | { | |
3c5e8123 | 541 | return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); |
6840bdd7 MZ |
542 | } |
543 | ||
544 | static inline int kvm_map_vectors(void) | |
545 | { | |
546 | return 0; | |
547 | } | |
548 | #endif | |
549 | ||
55e3748e MZ |
550 | #ifdef CONFIG_ARM64_SSBD |
551 | DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); | |
552 | ||
553 | static inline int hyp_map_aux_data(void) | |
554 | { | |
555 | int cpu, err; | |
556 | ||
557 | for_each_possible_cpu(cpu) { | |
558 | u64 *ptr; | |
559 | ||
560 | ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu); | |
561 | err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP); | |
562 | if (err) | |
563 | return err; | |
564 | } | |
565 | return 0; | |
566 | } | |
567 | #else | |
568 | static inline int hyp_map_aux_data(void) | |
569 | { | |
570 | return 0; | |
571 | } | |
572 | #endif | |
573 | ||
529c4b05 KM |
574 | #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) |
575 | ||
59558330 SP |
576 | /* |
577 | * Get the magic number 'x' for VTTBR:BADDR of this KVM instance. | |
578 | * With v8.2 LVA extensions, 'x' should be a minimum of 6 with | |
579 | * 52bit IPS. | |
580 | */ | |
581 | static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels) | |
582 | { | |
583 | int x = ARM64_VTTBR_X(ipa_shift, levels); | |
584 | ||
585 | return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x; | |
586 | } | |
587 | ||
588 | static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels) | |
589 | { | |
590 | unsigned int x = arm64_vttbr_x(ipa_shift, levels); | |
591 | ||
592 | return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x); | |
593 | } | |
594 | ||
595 | static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) | |
596 | { | |
597 | return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); | |
598 | } | |
599 | ||
e329fb75 | 600 | static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) |
ab510027 | 601 | { |
e329fb75 CD |
602 | struct kvm_vmid *vmid = &kvm->arch.vmid; |
603 | u64 vmid_field, baddr; | |
604 | u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0; | |
605 | ||
606 | baddr = kvm->arch.pgd_phys; | |
607 | vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT; | |
608 | return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; | |
ab510027 VM |
609 | } |
610 | ||
fe677be9 MZ |
611 | /* |
612 | * Must be called from hyp code running at EL2 with an updated VTTBR | |
613 | * and interrupts disabled. | |
614 | */ | |
615 | static __always_inline void __load_guest_stage2(struct kvm *kvm) | |
616 | { | |
617 | write_sysreg(kvm->arch.vtcr, vtcr_el2); | |
618 | write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); | |
619 | ||
620 | /* | |
621 | * ARM errata 1165522 and 1530923 require the actual execution of the | |
622 | * above before we can switch to the EL1/EL0 translation regime used by | |
623 | * the guest. | |
624 | */ | |
625 | asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); | |
626 | } | |
627 | ||
37c43753 MZ |
628 | #endif /* __ASSEMBLY__ */ |
629 | #endif /* __ARM64_KVM_MMU_H__ */ |