]>
Commit | Line | Data |
---|---|---|
749cf76c CD |
1 | /* |
2 | * Copyright (C) 2012 - Virtual Open Systems and Columbia University | |
3 | * Author: Christoffer Dall <c.dall@virtualopensystems.com> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify | |
6 | * it under the terms of the GNU General Public License, version 2, as | |
7 | * published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
17 | */ | |
342cd0ab CD |
18 | |
19 | #include <linux/mman.h> | |
20 | #include <linux/kvm_host.h> | |
21 | #include <linux/io.h> | |
45e96ea6 | 22 | #include <trace/events/kvm.h> |
342cd0ab | 23 | #include <asm/pgalloc.h> |
94f8e641 | 24 | #include <asm/cacheflush.h> |
342cd0ab CD |
25 | #include <asm/kvm_arm.h> |
26 | #include <asm/kvm_mmu.h> | |
45e96ea6 | 27 | #include <asm/kvm_mmio.h> |
d5d8184d | 28 | #include <asm/kvm_asm.h> |
94f8e641 | 29 | #include <asm/kvm_emulate.h> |
d5d8184d CD |
30 | |
31 | #include "trace.h" | |
342cd0ab CD |
32 | |
33 | extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; | |
34 | ||
35 | static DEFINE_MUTEX(kvm_hyp_pgd_mutex); | |
36 | ||
d5d8184d CD |
37 | static void kvm_tlb_flush_vmid(struct kvm *kvm) |
38 | { | |
39 | kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); | |
40 | } | |
41 | ||
d5d8184d CD |
42 | static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, |
43 | int min, int max) | |
44 | { | |
45 | void *page; | |
46 | ||
47 | BUG_ON(max > KVM_NR_MEM_OBJS); | |
48 | if (cache->nobjs >= min) | |
49 | return 0; | |
50 | while (cache->nobjs < max) { | |
51 | page = (void *)__get_free_page(PGALLOC_GFP); | |
52 | if (!page) | |
53 | return -ENOMEM; | |
54 | cache->objects[cache->nobjs++] = page; | |
55 | } | |
56 | return 0; | |
57 | } | |
58 | ||
59 | static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) | |
60 | { | |
61 | while (mc->nobjs) | |
62 | free_page((unsigned long)mc->objects[--mc->nobjs]); | |
63 | } | |
64 | ||
65 | static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) | |
66 | { | |
67 | void *p; | |
68 | ||
69 | BUG_ON(!mc || !mc->nobjs); | |
70 | p = mc->objects[--mc->nobjs]; | |
71 | return p; | |
72 | } | |
73 | ||
342cd0ab CD |
74 | static void free_ptes(pmd_t *pmd, unsigned long addr) |
75 | { | |
76 | pte_t *pte; | |
77 | unsigned int i; | |
78 | ||
79 | for (i = 0; i < PTRS_PER_PMD; i++, addr += PMD_SIZE) { | |
80 | if (!pmd_none(*pmd) && pmd_table(*pmd)) { | |
81 | pte = pte_offset_kernel(pmd, addr); | |
82 | pte_free_kernel(NULL, pte); | |
83 | } | |
84 | pmd++; | |
85 | } | |
86 | } | |
87 | ||
88 | /** | |
89 | * free_hyp_pmds - free a Hyp-mode level-2 tables and child level-3 tables | |
90 | * | |
91 | * Assumes this is a page table used strictly in Hyp-mode and therefore contains | |
92 | * only mappings in the kernel memory area, which is above PAGE_OFFSET. | |
93 | */ | |
94 | void free_hyp_pmds(void) | |
95 | { | |
96 | pgd_t *pgd; | |
97 | pud_t *pud; | |
98 | pmd_t *pmd; | |
99 | unsigned long addr; | |
100 | ||
101 | mutex_lock(&kvm_hyp_pgd_mutex); | |
102 | for (addr = PAGE_OFFSET; addr != 0; addr += PGDIR_SIZE) { | |
06e8c3b0 MZ |
103 | unsigned long hyp_addr = KERN_TO_HYP(addr); |
104 | pgd = hyp_pgd + pgd_index(hyp_addr); | |
105 | pud = pud_offset(pgd, hyp_addr); | |
342cd0ab CD |
106 | |
107 | if (pud_none(*pud)) | |
108 | continue; | |
109 | BUG_ON(pud_bad(*pud)); | |
110 | ||
06e8c3b0 | 111 | pmd = pmd_offset(pud, hyp_addr); |
342cd0ab CD |
112 | free_ptes(pmd, addr); |
113 | pmd_free(NULL, pmd); | |
114 | pud_clear(pud); | |
115 | } | |
116 | mutex_unlock(&kvm_hyp_pgd_mutex); | |
117 | } | |
118 | ||
119 | static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, | |
120 | unsigned long end) | |
121 | { | |
122 | pte_t *pte; | |
123 | unsigned long addr; | |
124 | struct page *page; | |
125 | ||
126 | for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { | |
06e8c3b0 MZ |
127 | unsigned long hyp_addr = KERN_TO_HYP(addr); |
128 | ||
129 | pte = pte_offset_kernel(pmd, hyp_addr); | |
342cd0ab CD |
130 | BUG_ON(!virt_addr_valid(addr)); |
131 | page = virt_to_page(addr); | |
132 | kvm_set_pte(pte, mk_pte(page, PAGE_HYP)); | |
133 | } | |
134 | } | |
135 | ||
136 | static void create_hyp_io_pte_mappings(pmd_t *pmd, unsigned long start, | |
137 | unsigned long end, | |
138 | unsigned long *pfn_base) | |
139 | { | |
140 | pte_t *pte; | |
141 | unsigned long addr; | |
142 | ||
143 | for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) { | |
06e8c3b0 MZ |
144 | unsigned long hyp_addr = KERN_TO_HYP(addr); |
145 | ||
146 | pte = pte_offset_kernel(pmd, hyp_addr); | |
342cd0ab CD |
147 | BUG_ON(pfn_valid(*pfn_base)); |
148 | kvm_set_pte(pte, pfn_pte(*pfn_base, PAGE_HYP_DEVICE)); | |
149 | (*pfn_base)++; | |
150 | } | |
151 | } | |
152 | ||
153 | static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, | |
154 | unsigned long end, unsigned long *pfn_base) | |
155 | { | |
156 | pmd_t *pmd; | |
157 | pte_t *pte; | |
158 | unsigned long addr, next; | |
159 | ||
160 | for (addr = start; addr < end; addr = next) { | |
06e8c3b0 MZ |
161 | unsigned long hyp_addr = KERN_TO_HYP(addr); |
162 | pmd = pmd_offset(pud, hyp_addr); | |
342cd0ab CD |
163 | |
164 | BUG_ON(pmd_sect(*pmd)); | |
165 | ||
166 | if (pmd_none(*pmd)) { | |
06e8c3b0 | 167 | pte = pte_alloc_one_kernel(NULL, hyp_addr); |
342cd0ab CD |
168 | if (!pte) { |
169 | kvm_err("Cannot allocate Hyp pte\n"); | |
170 | return -ENOMEM; | |
171 | } | |
172 | pmd_populate_kernel(NULL, pmd, pte); | |
173 | } | |
174 | ||
175 | next = pmd_addr_end(addr, end); | |
176 | ||
177 | /* | |
178 | * If pfn_base is NULL, we map kernel pages into HYP with the | |
179 | * virtual address. Otherwise, this is considered an I/O | |
180 | * mapping and we map the physical region starting at | |
181 | * *pfn_base to [start, end[. | |
182 | */ | |
183 | if (!pfn_base) | |
184 | create_hyp_pte_mappings(pmd, addr, next); | |
185 | else | |
186 | create_hyp_io_pte_mappings(pmd, addr, next, pfn_base); | |
187 | } | |
188 | ||
189 | return 0; | |
190 | } | |
191 | ||
192 | static int __create_hyp_mappings(void *from, void *to, unsigned long *pfn_base) | |
193 | { | |
194 | unsigned long start = (unsigned long)from; | |
195 | unsigned long end = (unsigned long)to; | |
196 | pgd_t *pgd; | |
197 | pud_t *pud; | |
198 | pmd_t *pmd; | |
199 | unsigned long addr, next; | |
200 | int err = 0; | |
201 | ||
b4034bde MZ |
202 | if (start >= end) |
203 | return -EINVAL; | |
204 | /* Check for a valid kernel memory mapping */ | |
205 | if (!pfn_base && (!virt_addr_valid(from) || !virt_addr_valid(to - 1))) | |
206 | return -EINVAL; | |
207 | /* Check for a valid kernel IO mapping */ | |
208 | if (pfn_base && (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))) | |
342cd0ab CD |
209 | return -EINVAL; |
210 | ||
211 | mutex_lock(&kvm_hyp_pgd_mutex); | |
212 | for (addr = start; addr < end; addr = next) { | |
06e8c3b0 MZ |
213 | unsigned long hyp_addr = KERN_TO_HYP(addr); |
214 | pgd = hyp_pgd + pgd_index(hyp_addr); | |
215 | pud = pud_offset(pgd, hyp_addr); | |
342cd0ab CD |
216 | |
217 | if (pud_none_or_clear_bad(pud)) { | |
06e8c3b0 | 218 | pmd = pmd_alloc_one(NULL, hyp_addr); |
342cd0ab CD |
219 | if (!pmd) { |
220 | kvm_err("Cannot allocate Hyp pmd\n"); | |
221 | err = -ENOMEM; | |
222 | goto out; | |
223 | } | |
224 | pud_populate(NULL, pud, pmd); | |
225 | } | |
226 | ||
227 | next = pgd_addr_end(addr, end); | |
228 | err = create_hyp_pmd_mappings(pud, addr, next, pfn_base); | |
229 | if (err) | |
230 | goto out; | |
231 | } | |
232 | out: | |
233 | mutex_unlock(&kvm_hyp_pgd_mutex); | |
234 | return err; | |
235 | } | |
236 | ||
237 | /** | |
06e8c3b0 | 238 | * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode |
342cd0ab CD |
239 | * @from: The virtual kernel start address of the range |
240 | * @to: The virtual kernel end address of the range (exclusive) | |
241 | * | |
06e8c3b0 MZ |
242 | * The same virtual address as the kernel virtual address is also used |
243 | * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying | |
244 | * physical pages. | |
342cd0ab CD |
245 | * |
246 | * Note: Wrapping around zero in the "to" address is not supported. | |
247 | */ | |
248 | int create_hyp_mappings(void *from, void *to) | |
249 | { | |
250 | return __create_hyp_mappings(from, to, NULL); | |
251 | } | |
252 | ||
253 | /** | |
06e8c3b0 MZ |
254 | * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode |
255 | * @from: The kernel start VA of the range | |
256 | * @to: The kernel end VA of the range (exclusive) | |
342cd0ab | 257 | * @addr: The physical start address which gets mapped |
06e8c3b0 MZ |
258 | * |
259 | * The resulting HYP VA is the same as the kernel VA, modulo | |
260 | * HYP_PAGE_OFFSET. | |
342cd0ab CD |
261 | */ |
262 | int create_hyp_io_mappings(void *from, void *to, phys_addr_t addr) | |
263 | { | |
264 | unsigned long pfn = __phys_to_pfn(addr); | |
265 | return __create_hyp_mappings(from, to, &pfn); | |
266 | } | |
267 | ||
d5d8184d CD |
268 | /** |
269 | * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. | |
270 | * @kvm: The KVM struct pointer for the VM. | |
271 | * | |
272 | * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can | |
273 | * support either full 40-bit input addresses or limited to 32-bit input | |
274 | * addresses). Clears the allocated pages. | |
275 | * | |
276 | * Note we don't need locking here as this is only called when the VM is | |
277 | * created, which can only be done once. | |
278 | */ | |
279 | int kvm_alloc_stage2_pgd(struct kvm *kvm) | |
280 | { | |
281 | pgd_t *pgd; | |
282 | ||
283 | if (kvm->arch.pgd != NULL) { | |
284 | kvm_err("kvm_arch already initialized?\n"); | |
285 | return -EINVAL; | |
286 | } | |
287 | ||
288 | pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER); | |
289 | if (!pgd) | |
290 | return -ENOMEM; | |
291 | ||
292 | /* stage-2 pgd must be aligned to its size */ | |
293 | VM_BUG_ON((unsigned long)pgd & (S2_PGD_SIZE - 1)); | |
294 | ||
295 | memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t)); | |
c62ee2b2 | 296 | kvm_clean_pgd(pgd); |
d5d8184d CD |
297 | kvm->arch.pgd = pgd; |
298 | ||
299 | return 0; | |
300 | } | |
301 | ||
302 | static void clear_pud_entry(pud_t *pud) | |
303 | { | |
304 | pmd_t *pmd_table = pmd_offset(pud, 0); | |
305 | pud_clear(pud); | |
306 | pmd_free(NULL, pmd_table); | |
307 | put_page(virt_to_page(pud)); | |
308 | } | |
309 | ||
310 | static void clear_pmd_entry(pmd_t *pmd) | |
311 | { | |
312 | pte_t *pte_table = pte_offset_kernel(pmd, 0); | |
313 | pmd_clear(pmd); | |
314 | pte_free_kernel(NULL, pte_table); | |
315 | put_page(virt_to_page(pmd)); | |
316 | } | |
317 | ||
318 | static bool pmd_empty(pmd_t *pmd) | |
319 | { | |
320 | struct page *pmd_page = virt_to_page(pmd); | |
321 | return page_count(pmd_page) == 1; | |
322 | } | |
323 | ||
324 | static void clear_pte_entry(pte_t *pte) | |
325 | { | |
326 | if (pte_present(*pte)) { | |
327 | kvm_set_pte(pte, __pte(0)); | |
328 | put_page(virt_to_page(pte)); | |
329 | } | |
330 | } | |
331 | ||
332 | static bool pte_empty(pte_t *pte) | |
333 | { | |
334 | struct page *pte_page = virt_to_page(pte); | |
335 | return page_count(pte_page) == 1; | |
336 | } | |
337 | ||
338 | /** | |
339 | * unmap_stage2_range -- Clear stage2 page table entries to unmap a range | |
340 | * @kvm: The VM pointer | |
341 | * @start: The intermediate physical base address of the range to unmap | |
342 | * @size: The size of the area to unmap | |
343 | * | |
344 | * Clear a range of stage-2 mappings, lowering the various ref-counts. Must | |
345 | * be called while holding mmu_lock (unless for freeing the stage2 pgd before | |
346 | * destroying the VM), otherwise another faulting VCPU may come in and mess | |
347 | * with things behind our backs. | |
348 | */ | |
349 | static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) | |
350 | { | |
351 | pgd_t *pgd; | |
352 | pud_t *pud; | |
353 | pmd_t *pmd; | |
354 | pte_t *pte; | |
355 | phys_addr_t addr = start, end = start + size; | |
356 | u64 range; | |
357 | ||
358 | while (addr < end) { | |
359 | pgd = kvm->arch.pgd + pgd_index(addr); | |
360 | pud = pud_offset(pgd, addr); | |
361 | if (pud_none(*pud)) { | |
362 | addr += PUD_SIZE; | |
363 | continue; | |
364 | } | |
365 | ||
366 | pmd = pmd_offset(pud, addr); | |
367 | if (pmd_none(*pmd)) { | |
368 | addr += PMD_SIZE; | |
369 | continue; | |
370 | } | |
371 | ||
372 | pte = pte_offset_kernel(pmd, addr); | |
373 | clear_pte_entry(pte); | |
374 | range = PAGE_SIZE; | |
375 | ||
376 | /* If we emptied the pte, walk back up the ladder */ | |
377 | if (pte_empty(pte)) { | |
378 | clear_pmd_entry(pmd); | |
379 | range = PMD_SIZE; | |
380 | if (pmd_empty(pmd)) { | |
381 | clear_pud_entry(pud); | |
382 | range = PUD_SIZE; | |
383 | } | |
384 | } | |
385 | ||
386 | addr += range; | |
387 | } | |
388 | } | |
389 | ||
390 | /** | |
391 | * kvm_free_stage2_pgd - free all stage-2 tables | |
392 | * @kvm: The KVM struct pointer for the VM. | |
393 | * | |
394 | * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all | |
395 | * underlying level-2 and level-3 tables before freeing the actual level-1 table | |
396 | * and setting the struct pointer to NULL. | |
397 | * | |
398 | * Note we don't need locking here as this is only called when the VM is | |
399 | * destroyed, which can only be done once. | |
400 | */ | |
401 | void kvm_free_stage2_pgd(struct kvm *kvm) | |
402 | { | |
403 | if (kvm->arch.pgd == NULL) | |
404 | return; | |
405 | ||
406 | unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE); | |
407 | free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER); | |
408 | kvm->arch.pgd = NULL; | |
409 | } | |
410 | ||
411 | ||
412 | static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, | |
413 | phys_addr_t addr, const pte_t *new_pte, bool iomap) | |
414 | { | |
415 | pgd_t *pgd; | |
416 | pud_t *pud; | |
417 | pmd_t *pmd; | |
418 | pte_t *pte, old_pte; | |
419 | ||
420 | /* Create 2nd stage page table mapping - Level 1 */ | |
421 | pgd = kvm->arch.pgd + pgd_index(addr); | |
422 | pud = pud_offset(pgd, addr); | |
423 | if (pud_none(*pud)) { | |
424 | if (!cache) | |
425 | return 0; /* ignore calls from kvm_set_spte_hva */ | |
426 | pmd = mmu_memory_cache_alloc(cache); | |
427 | pud_populate(NULL, pud, pmd); | |
d5d8184d | 428 | get_page(virt_to_page(pud)); |
c62ee2b2 MZ |
429 | } |
430 | ||
431 | pmd = pmd_offset(pud, addr); | |
d5d8184d CD |
432 | |
433 | /* Create 2nd stage page table mapping - Level 2 */ | |
434 | if (pmd_none(*pmd)) { | |
435 | if (!cache) | |
436 | return 0; /* ignore calls from kvm_set_spte_hva */ | |
437 | pte = mmu_memory_cache_alloc(cache); | |
c62ee2b2 | 438 | kvm_clean_pte(pte); |
d5d8184d | 439 | pmd_populate_kernel(NULL, pmd, pte); |
d5d8184d | 440 | get_page(virt_to_page(pmd)); |
c62ee2b2 MZ |
441 | } |
442 | ||
443 | pte = pte_offset_kernel(pmd, addr); | |
d5d8184d CD |
444 | |
445 | if (iomap && pte_present(*pte)) | |
446 | return -EFAULT; | |
447 | ||
448 | /* Create 2nd stage page table mapping - Level 3 */ | |
449 | old_pte = *pte; | |
450 | kvm_set_pte(pte, *new_pte); | |
451 | if (pte_present(old_pte)) | |
452 | kvm_tlb_flush_vmid(kvm); | |
453 | else | |
454 | get_page(virt_to_page(pte)); | |
455 | ||
456 | return 0; | |
457 | } | |
458 | ||
459 | /** | |
460 | * kvm_phys_addr_ioremap - map a device range to guest IPA | |
461 | * | |
462 | * @kvm: The KVM pointer | |
463 | * @guest_ipa: The IPA at which to insert the mapping | |
464 | * @pa: The physical address of the device | |
465 | * @size: The size of the mapping | |
466 | */ | |
467 | int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, | |
468 | phys_addr_t pa, unsigned long size) | |
469 | { | |
470 | phys_addr_t addr, end; | |
471 | int ret = 0; | |
472 | unsigned long pfn; | |
473 | struct kvm_mmu_memory_cache cache = { 0, }; | |
474 | ||
475 | end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK; | |
476 | pfn = __phys_to_pfn(pa); | |
477 | ||
478 | for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { | |
c62ee2b2 MZ |
479 | pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE); |
480 | kvm_set_s2pte_writable(&pte); | |
d5d8184d CD |
481 | |
482 | ret = mmu_topup_memory_cache(&cache, 2, 2); | |
483 | if (ret) | |
484 | goto out; | |
485 | spin_lock(&kvm->mmu_lock); | |
486 | ret = stage2_set_pte(kvm, &cache, addr, &pte, true); | |
487 | spin_unlock(&kvm->mmu_lock); | |
488 | if (ret) | |
489 | goto out; | |
490 | ||
491 | pfn++; | |
492 | } | |
493 | ||
494 | out: | |
495 | mmu_free_memory_cache(&cache); | |
496 | return ret; | |
497 | } | |
498 | ||
94f8e641 CD |
499 | static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, |
500 | gfn_t gfn, struct kvm_memory_slot *memslot, | |
501 | unsigned long fault_status) | |
502 | { | |
503 | pte_t new_pte; | |
504 | pfn_t pfn; | |
505 | int ret; | |
506 | bool write_fault, writable; | |
507 | unsigned long mmu_seq; | |
508 | struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; | |
509 | ||
7393b599 | 510 | write_fault = kvm_is_write_fault(kvm_vcpu_get_hsr(vcpu)); |
94f8e641 CD |
511 | if (fault_status == FSC_PERM && !write_fault) { |
512 | kvm_err("Unexpected L2 read permission error\n"); | |
513 | return -EFAULT; | |
514 | } | |
515 | ||
516 | /* We need minimum second+third level pages */ | |
517 | ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS); | |
518 | if (ret) | |
519 | return ret; | |
520 | ||
521 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | |
522 | /* | |
523 | * Ensure the read of mmu_notifier_seq happens before we call | |
524 | * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk | |
525 | * the page we just got a reference to gets unmapped before we have a | |
526 | * chance to grab the mmu_lock, which ensure that if the page gets | |
527 | * unmapped afterwards, the call to kvm_unmap_hva will take it away | |
528 | * from us again properly. This smp_rmb() interacts with the smp_wmb() | |
529 | * in kvm_mmu_notifier_invalidate_<page|range_end>. | |
530 | */ | |
531 | smp_rmb(); | |
532 | ||
533 | pfn = gfn_to_pfn_prot(vcpu->kvm, gfn, write_fault, &writable); | |
534 | if (is_error_pfn(pfn)) | |
535 | return -EFAULT; | |
536 | ||
537 | new_pte = pfn_pte(pfn, PAGE_S2); | |
538 | coherent_icache_guest_page(vcpu->kvm, gfn); | |
539 | ||
540 | spin_lock(&vcpu->kvm->mmu_lock); | |
541 | if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) | |
542 | goto out_unlock; | |
543 | if (writable) { | |
c62ee2b2 | 544 | kvm_set_s2pte_writable(&new_pte); |
94f8e641 CD |
545 | kvm_set_pfn_dirty(pfn); |
546 | } | |
547 | stage2_set_pte(vcpu->kvm, memcache, fault_ipa, &new_pte, false); | |
548 | ||
549 | out_unlock: | |
550 | spin_unlock(&vcpu->kvm->mmu_lock); | |
551 | kvm_release_pfn_clean(pfn); | |
552 | return 0; | |
553 | } | |
554 | ||
555 | /** | |
556 | * kvm_handle_guest_abort - handles all 2nd stage aborts | |
557 | * @vcpu: the VCPU pointer | |
558 | * @run: the kvm_run structure | |
559 | * | |
560 | * Any abort that gets to the host is almost guaranteed to be caused by a | |
561 | * missing second stage translation table entry, which can mean that either the | |
562 | * guest simply needs more memory and we must allocate an appropriate page or it | |
563 | * can mean that the guest tried to access I/O memory, which is emulated by user | |
564 | * space. The distinction is based on the IPA causing the fault and whether this | |
565 | * memory region has been registered as standard RAM by user space. | |
566 | */ | |
342cd0ab CD |
567 | int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) |
568 | { | |
94f8e641 CD |
569 | unsigned long fault_status; |
570 | phys_addr_t fault_ipa; | |
571 | struct kvm_memory_slot *memslot; | |
572 | bool is_iabt; | |
573 | gfn_t gfn; | |
574 | int ret, idx; | |
575 | ||
52d1dba9 | 576 | is_iabt = kvm_vcpu_trap_is_iabt(vcpu); |
7393b599 | 577 | fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); |
94f8e641 | 578 | |
7393b599 MZ |
579 | trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), |
580 | kvm_vcpu_get_hfar(vcpu), fault_ipa); | |
94f8e641 CD |
581 | |
582 | /* Check the stage-2 fault is trans. fault or write fault */ | |
1cc287dd | 583 | fault_status = kvm_vcpu_trap_get_fault(vcpu); |
94f8e641 | 584 | if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { |
52d1dba9 MZ |
585 | kvm_err("Unsupported fault status: EC=%#x DFCS=%#lx\n", |
586 | kvm_vcpu_trap_get_class(vcpu), fault_status); | |
94f8e641 CD |
587 | return -EFAULT; |
588 | } | |
589 | ||
590 | idx = srcu_read_lock(&vcpu->kvm->srcu); | |
591 | ||
592 | gfn = fault_ipa >> PAGE_SHIFT; | |
593 | if (!kvm_is_visible_gfn(vcpu->kvm, gfn)) { | |
594 | if (is_iabt) { | |
595 | /* Prefetch Abort on I/O address */ | |
7393b599 | 596 | kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); |
94f8e641 CD |
597 | ret = 1; |
598 | goto out_unlock; | |
599 | } | |
600 | ||
601 | if (fault_status != FSC_FAULT) { | |
602 | kvm_err("Unsupported fault status on io memory: %#lx\n", | |
603 | fault_status); | |
604 | ret = -EFAULT; | |
605 | goto out_unlock; | |
606 | } | |
607 | ||
cfe3950c MZ |
608 | /* |
609 | * The IPA is reported as [MAX:12], so we need to | |
610 | * complement it with the bottom 12 bits from the | |
611 | * faulting VA. This is always 12 bits, irrespective | |
612 | * of the page size. | |
613 | */ | |
614 | fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); | |
45e96ea6 | 615 | ret = io_mem_abort(vcpu, run, fault_ipa); |
94f8e641 CD |
616 | goto out_unlock; |
617 | } | |
618 | ||
619 | memslot = gfn_to_memslot(vcpu->kvm, gfn); | |
94f8e641 CD |
620 | |
621 | ret = user_mem_abort(vcpu, fault_ipa, gfn, memslot, fault_status); | |
622 | if (ret == 0) | |
623 | ret = 1; | |
624 | out_unlock: | |
625 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | |
626 | return ret; | |
342cd0ab CD |
627 | } |
628 | ||
d5d8184d CD |
629 | static void handle_hva_to_gpa(struct kvm *kvm, |
630 | unsigned long start, | |
631 | unsigned long end, | |
632 | void (*handler)(struct kvm *kvm, | |
633 | gpa_t gpa, void *data), | |
634 | void *data) | |
635 | { | |
636 | struct kvm_memslots *slots; | |
637 | struct kvm_memory_slot *memslot; | |
638 | ||
639 | slots = kvm_memslots(kvm); | |
640 | ||
641 | /* we only care about the pages that the guest sees */ | |
642 | kvm_for_each_memslot(memslot, slots) { | |
643 | unsigned long hva_start, hva_end; | |
644 | gfn_t gfn, gfn_end; | |
645 | ||
646 | hva_start = max(start, memslot->userspace_addr); | |
647 | hva_end = min(end, memslot->userspace_addr + | |
648 | (memslot->npages << PAGE_SHIFT)); | |
649 | if (hva_start >= hva_end) | |
650 | continue; | |
651 | ||
652 | /* | |
653 | * {gfn(page) | page intersects with [hva_start, hva_end)} = | |
654 | * {gfn_start, gfn_start+1, ..., gfn_end-1}. | |
655 | */ | |
656 | gfn = hva_to_gfn_memslot(hva_start, memslot); | |
657 | gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); | |
658 | ||
659 | for (; gfn < gfn_end; ++gfn) { | |
660 | gpa_t gpa = gfn << PAGE_SHIFT; | |
661 | handler(kvm, gpa, data); | |
662 | } | |
663 | } | |
664 | } | |
665 | ||
666 | static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) | |
667 | { | |
668 | unmap_stage2_range(kvm, gpa, PAGE_SIZE); | |
669 | kvm_tlb_flush_vmid(kvm); | |
670 | } | |
671 | ||
672 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | |
673 | { | |
674 | unsigned long end = hva + PAGE_SIZE; | |
675 | ||
676 | if (!kvm->arch.pgd) | |
677 | return 0; | |
678 | ||
679 | trace_kvm_unmap_hva(hva); | |
680 | handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); | |
681 | return 0; | |
682 | } | |
683 | ||
684 | int kvm_unmap_hva_range(struct kvm *kvm, | |
685 | unsigned long start, unsigned long end) | |
686 | { | |
687 | if (!kvm->arch.pgd) | |
688 | return 0; | |
689 | ||
690 | trace_kvm_unmap_hva_range(start, end); | |
691 | handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); | |
692 | return 0; | |
693 | } | |
694 | ||
695 | static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) | |
696 | { | |
697 | pte_t *pte = (pte_t *)data; | |
698 | ||
699 | stage2_set_pte(kvm, NULL, gpa, pte, false); | |
700 | } | |
701 | ||
702 | ||
703 | void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) | |
704 | { | |
705 | unsigned long end = hva + PAGE_SIZE; | |
706 | pte_t stage2_pte; | |
707 | ||
708 | if (!kvm->arch.pgd) | |
709 | return; | |
710 | ||
711 | trace_kvm_set_spte_hva(hva); | |
712 | stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2); | |
713 | handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); | |
714 | } | |
715 | ||
716 | void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) | |
717 | { | |
718 | mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); | |
719 | } | |
720 | ||
342cd0ab CD |
721 | phys_addr_t kvm_mmu_get_httbr(void) |
722 | { | |
723 | VM_BUG_ON(!virt_addr_valid(hyp_pgd)); | |
724 | return virt_to_phys(hyp_pgd); | |
725 | } | |
726 | ||
727 | int kvm_mmu_init(void) | |
728 | { | |
d5d8184d CD |
729 | if (!hyp_pgd) { |
730 | kvm_err("Hyp mode PGD not allocated\n"); | |
731 | return -ENOMEM; | |
732 | } | |
733 | ||
734 | return 0; | |
342cd0ab CD |
735 | } |
736 | ||
737 | /** | |
738 | * kvm_clear_idmap - remove all idmaps from the hyp pgd | |
739 | * | |
740 | * Free the underlying pmds for all pgds in range and clear the pgds (but | |
741 | * don't free them) afterwards. | |
742 | */ | |
743 | void kvm_clear_hyp_idmap(void) | |
744 | { | |
745 | unsigned long addr, end; | |
746 | unsigned long next; | |
747 | pgd_t *pgd = hyp_pgd; | |
748 | pud_t *pud; | |
749 | pmd_t *pmd; | |
750 | ||
751 | addr = virt_to_phys(__hyp_idmap_text_start); | |
752 | end = virt_to_phys(__hyp_idmap_text_end); | |
753 | ||
754 | pgd += pgd_index(addr); | |
755 | do { | |
756 | next = pgd_addr_end(addr, end); | |
757 | if (pgd_none_or_clear_bad(pgd)) | |
758 | continue; | |
759 | pud = pud_offset(pgd, addr); | |
760 | pmd = pmd_offset(pud, addr); | |
761 | ||
762 | pud_clear(pud); | |
c62ee2b2 | 763 | kvm_clean_pmd_entry(pmd); |
342cd0ab CD |
764 | pmd_free(NULL, (pmd_t *)((unsigned long)pmd & PAGE_MASK)); |
765 | } while (pgd++, addr = next, addr < end); | |
766 | } |