]>
Commit | Line | Data |
---|---|---|
bb359dbc AK |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * This file contains KASAN runtime code that manages shadow memory for | |
4 | * generic and software tag-based KASAN modes. | |
5 | * | |
6 | * Copyright (c) 2014 Samsung Electronics Co., Ltd. | |
7 | * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com> | |
8 | * | |
9 | * Some code borrowed from https://github.com/xairy/kasan-prototype by | |
10 | * Andrey Konovalov <andreyknvl@gmail.com> | |
11 | */ | |
12 | ||
13 | #include <linux/init.h> | |
14 | #include <linux/kasan.h> | |
15 | #include <linux/kernel.h> | |
16 | #include <linux/kmemleak.h> | |
17 | #include <linux/memory.h> | |
18 | #include <linux/mm.h> | |
19 | #include <linux/string.h> | |
20 | #include <linux/types.h> | |
21 | #include <linux/vmalloc.h> | |
22 | ||
23 | #include <asm/cacheflush.h> | |
24 | #include <asm/tlbflush.h> | |
25 | ||
26 | #include "kasan.h" | |
27 | ||
28 | bool __kasan_check_read(const volatile void *p, unsigned int size) | |
29 | { | |
30 | return check_memory_region((unsigned long)p, size, false, _RET_IP_); | |
31 | } | |
32 | EXPORT_SYMBOL(__kasan_check_read); | |
33 | ||
34 | bool __kasan_check_write(const volatile void *p, unsigned int size) | |
35 | { | |
36 | return check_memory_region((unsigned long)p, size, true, _RET_IP_); | |
37 | } | |
38 | EXPORT_SYMBOL(__kasan_check_write); | |
39 | ||
40 | #undef memset | |
41 | void *memset(void *addr, int c, size_t len) | |
42 | { | |
43 | if (!check_memory_region((unsigned long)addr, len, true, _RET_IP_)) | |
44 | return NULL; | |
45 | ||
46 | return __memset(addr, c, len); | |
47 | } | |
48 | ||
49 | #ifdef __HAVE_ARCH_MEMMOVE | |
50 | #undef memmove | |
51 | void *memmove(void *dest, const void *src, size_t len) | |
52 | { | |
53 | if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) || | |
54 | !check_memory_region((unsigned long)dest, len, true, _RET_IP_)) | |
55 | return NULL; | |
56 | ||
57 | return __memmove(dest, src, len); | |
58 | } | |
59 | #endif | |
60 | ||
61 | #undef memcpy | |
62 | void *memcpy(void *dest, const void *src, size_t len) | |
63 | { | |
64 | if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) || | |
65 | !check_memory_region((unsigned long)dest, len, true, _RET_IP_)) | |
66 | return NULL; | |
67 | ||
68 | return __memcpy(dest, src, len); | |
69 | } | |
70 | ||
71 | /* | |
72 | * Poisons the shadow memory for 'size' bytes starting from 'addr'. | |
73 | * Memory addresses should be aligned to KASAN_GRANULE_SIZE. | |
74 | */ | |
75 | void poison_range(const void *address, size_t size, u8 value) | |
76 | { | |
77 | void *shadow_start, *shadow_end; | |
78 | ||
79 | /* | |
80 | * Perform shadow offset calculation based on untagged address, as | |
81 | * some of the callers (e.g. kasan_poison_object_data) pass tagged | |
82 | * addresses to this function. | |
83 | */ | |
c0054c56 | 84 | address = kasan_reset_tag(address); |
d99f6a10 | 85 | size = round_up(size, KASAN_GRANULE_SIZE); |
bb359dbc AK |
86 | |
87 | shadow_start = kasan_mem_to_shadow(address); | |
88 | shadow_end = kasan_mem_to_shadow(address + size); | |
89 | ||
90 | __memset(shadow_start, value, shadow_end - shadow_start); | |
91 | } | |
92 | ||
93 | void unpoison_range(const void *address, size_t size) | |
94 | { | |
95 | u8 tag = get_tag(address); | |
96 | ||
97 | /* | |
98 | * Perform shadow offset calculation based on untagged address, as | |
99 | * some of the callers (e.g. kasan_unpoison_object_data) pass tagged | |
100 | * addresses to this function. | |
101 | */ | |
c0054c56 | 102 | address = kasan_reset_tag(address); |
bb359dbc AK |
103 | |
104 | poison_range(address, size, tag); | |
105 | ||
106 | if (size & KASAN_GRANULE_MASK) { | |
107 | u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); | |
108 | ||
109 | if (IS_ENABLED(CONFIG_KASAN_SW_TAGS)) | |
110 | *shadow = tag; | |
2e903b91 | 111 | else /* CONFIG_KASAN_GENERIC */ |
bb359dbc AK |
112 | *shadow = size & KASAN_GRANULE_MASK; |
113 | } | |
114 | } | |
115 | ||
116 | #ifdef CONFIG_MEMORY_HOTPLUG | |
117 | static bool shadow_mapped(unsigned long addr) | |
118 | { | |
119 | pgd_t *pgd = pgd_offset_k(addr); | |
120 | p4d_t *p4d; | |
121 | pud_t *pud; | |
122 | pmd_t *pmd; | |
123 | pte_t *pte; | |
124 | ||
125 | if (pgd_none(*pgd)) | |
126 | return false; | |
127 | p4d = p4d_offset(pgd, addr); | |
128 | if (p4d_none(*p4d)) | |
129 | return false; | |
130 | pud = pud_offset(p4d, addr); | |
131 | if (pud_none(*pud)) | |
132 | return false; | |
133 | ||
134 | /* | |
135 | * We can't use pud_large() or pud_huge(), the first one is | |
136 | * arch-specific, the last one depends on HUGETLB_PAGE. So let's abuse | |
137 | * pud_bad(), if pud is bad then it's bad because it's huge. | |
138 | */ | |
139 | if (pud_bad(*pud)) | |
140 | return true; | |
141 | pmd = pmd_offset(pud, addr); | |
142 | if (pmd_none(*pmd)) | |
143 | return false; | |
144 | ||
145 | if (pmd_bad(*pmd)) | |
146 | return true; | |
147 | pte = pte_offset_kernel(pmd, addr); | |
148 | return !pte_none(*pte); | |
149 | } | |
150 | ||
151 | static int __meminit kasan_mem_notifier(struct notifier_block *nb, | |
152 | unsigned long action, void *data) | |
153 | { | |
154 | struct memory_notify *mem_data = data; | |
155 | unsigned long nr_shadow_pages, start_kaddr, shadow_start; | |
156 | unsigned long shadow_end, shadow_size; | |
157 | ||
158 | nr_shadow_pages = mem_data->nr_pages >> KASAN_SHADOW_SCALE_SHIFT; | |
159 | start_kaddr = (unsigned long)pfn_to_kaddr(mem_data->start_pfn); | |
160 | shadow_start = (unsigned long)kasan_mem_to_shadow((void *)start_kaddr); | |
161 | shadow_size = nr_shadow_pages << PAGE_SHIFT; | |
162 | shadow_end = shadow_start + shadow_size; | |
163 | ||
164 | if (WARN_ON(mem_data->nr_pages % KASAN_GRANULE_SIZE) || | |
affc3f07 | 165 | WARN_ON(start_kaddr % KASAN_MEMORY_PER_SHADOW_PAGE)) |
bb359dbc AK |
166 | return NOTIFY_BAD; |
167 | ||
168 | switch (action) { | |
169 | case MEM_GOING_ONLINE: { | |
170 | void *ret; | |
171 | ||
172 | /* | |
173 | * If shadow is mapped already than it must have been mapped | |
174 | * during the boot. This could happen if we onlining previously | |
175 | * offlined memory. | |
176 | */ | |
177 | if (shadow_mapped(shadow_start)) | |
178 | return NOTIFY_OK; | |
179 | ||
180 | ret = __vmalloc_node_range(shadow_size, PAGE_SIZE, shadow_start, | |
181 | shadow_end, GFP_KERNEL, | |
182 | PAGE_KERNEL, VM_NO_GUARD, | |
183 | pfn_to_nid(mem_data->start_pfn), | |
184 | __builtin_return_address(0)); | |
185 | if (!ret) | |
186 | return NOTIFY_BAD; | |
187 | ||
188 | kmemleak_ignore(ret); | |
189 | return NOTIFY_OK; | |
190 | } | |
191 | case MEM_CANCEL_ONLINE: | |
192 | case MEM_OFFLINE: { | |
193 | struct vm_struct *vm; | |
194 | ||
195 | /* | |
196 | * shadow_start was either mapped during boot by kasan_init() | |
197 | * or during memory online by __vmalloc_node_range(). | |
198 | * In the latter case we can use vfree() to free shadow. | |
199 | * Non-NULL result of the find_vm_area() will tell us if | |
200 | * that was the second case. | |
201 | * | |
202 | * Currently it's not possible to free shadow mapped | |
203 | * during boot by kasan_init(). It's because the code | |
204 | * to do that hasn't been written yet. So we'll just | |
205 | * leak the memory. | |
206 | */ | |
207 | vm = find_vm_area((void *)shadow_start); | |
208 | if (vm) | |
209 | vfree((void *)shadow_start); | |
210 | } | |
211 | } | |
212 | ||
213 | return NOTIFY_OK; | |
214 | } | |
215 | ||
216 | static int __init kasan_memhotplug_init(void) | |
217 | { | |
218 | hotplug_memory_notifier(kasan_mem_notifier, 0); | |
219 | ||
220 | return 0; | |
221 | } | |
222 | ||
223 | core_initcall(kasan_memhotplug_init); | |
224 | #endif | |
225 | ||
226 | #ifdef CONFIG_KASAN_VMALLOC | |
227 | ||
228 | static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, | |
229 | void *unused) | |
230 | { | |
231 | unsigned long page; | |
232 | pte_t pte; | |
233 | ||
234 | if (likely(!pte_none(*ptep))) | |
235 | return 0; | |
236 | ||
237 | page = __get_free_page(GFP_KERNEL); | |
238 | if (!page) | |
239 | return -ENOMEM; | |
240 | ||
241 | memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); | |
242 | pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); | |
243 | ||
244 | spin_lock(&init_mm.page_table_lock); | |
245 | if (likely(pte_none(*ptep))) { | |
246 | set_pte_at(&init_mm, addr, ptep, pte); | |
247 | page = 0; | |
248 | } | |
249 | spin_unlock(&init_mm.page_table_lock); | |
250 | if (page) | |
251 | free_page(page); | |
252 | return 0; | |
253 | } | |
254 | ||
255 | int kasan_populate_vmalloc(unsigned long addr, unsigned long size) | |
256 | { | |
257 | unsigned long shadow_start, shadow_end; | |
258 | int ret; | |
259 | ||
260 | if (!is_vmalloc_or_module_addr((void *)addr)) | |
261 | return 0; | |
262 | ||
263 | shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr); | |
264 | shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE); | |
265 | shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size); | |
266 | shadow_end = ALIGN(shadow_end, PAGE_SIZE); | |
267 | ||
268 | ret = apply_to_page_range(&init_mm, shadow_start, | |
269 | shadow_end - shadow_start, | |
270 | kasan_populate_vmalloc_pte, NULL); | |
271 | if (ret) | |
272 | return ret; | |
273 | ||
274 | flush_cache_vmap(shadow_start, shadow_end); | |
275 | ||
276 | /* | |
277 | * We need to be careful about inter-cpu effects here. Consider: | |
278 | * | |
279 | * CPU#0 CPU#1 | |
280 | * WRITE_ONCE(p, vmalloc(100)); while (x = READ_ONCE(p)) ; | |
281 | * p[99] = 1; | |
282 | * | |
283 | * With compiler instrumentation, that ends up looking like this: | |
284 | * | |
285 | * CPU#0 CPU#1 | |
286 | * // vmalloc() allocates memory | |
287 | * // let a = area->addr | |
288 | * // we reach kasan_populate_vmalloc | |
289 | * // and call unpoison_range: | |
290 | * STORE shadow(a), unpoison_val | |
291 | * ... | |
292 | * STORE shadow(a+99), unpoison_val x = LOAD p | |
293 | * // rest of vmalloc process <data dependency> | |
294 | * STORE p, a LOAD shadow(x+99) | |
295 | * | |
296 | * If there is no barrier between the end of unpoisioning the shadow | |
297 | * and the store of the result to p, the stores could be committed | |
298 | * in a different order by CPU#0, and CPU#1 could erroneously observe | |
299 | * poison in the shadow. | |
300 | * | |
301 | * We need some sort of barrier between the stores. | |
302 | * | |
303 | * In the vmalloc() case, this is provided by a smp_wmb() in | |
304 | * clear_vm_uninitialized_flag(). In the per-cpu allocator and in | |
305 | * get_vm_area() and friends, the caller gets shadow allocated but | |
306 | * doesn't have any pages mapped into the virtual address space that | |
307 | * has been reserved. Mapping those pages in will involve taking and | |
308 | * releasing a page-table lock, which will provide the barrier. | |
309 | */ | |
310 | ||
311 | return 0; | |
312 | } | |
313 | ||
314 | /* | |
315 | * Poison the shadow for a vmalloc region. Called as part of the | |
316 | * freeing process at the time the region is freed. | |
317 | */ | |
318 | void kasan_poison_vmalloc(const void *start, unsigned long size) | |
319 | { | |
320 | if (!is_vmalloc_or_module_addr(start)) | |
321 | return; | |
322 | ||
323 | size = round_up(size, KASAN_GRANULE_SIZE); | |
324 | poison_range(start, size, KASAN_VMALLOC_INVALID); | |
325 | } | |
326 | ||
327 | void kasan_unpoison_vmalloc(const void *start, unsigned long size) | |
328 | { | |
329 | if (!is_vmalloc_or_module_addr(start)) | |
330 | return; | |
331 | ||
332 | unpoison_range(start, size); | |
333 | } | |
334 | ||
335 | static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, | |
336 | void *unused) | |
337 | { | |
338 | unsigned long page; | |
339 | ||
340 | page = (unsigned long)__va(pte_pfn(*ptep) << PAGE_SHIFT); | |
341 | ||
342 | spin_lock(&init_mm.page_table_lock); | |
343 | ||
344 | if (likely(!pte_none(*ptep))) { | |
345 | pte_clear(&init_mm, addr, ptep); | |
346 | free_page(page); | |
347 | } | |
348 | spin_unlock(&init_mm.page_table_lock); | |
349 | ||
350 | return 0; | |
351 | } | |
352 | ||
353 | /* | |
354 | * Release the backing for the vmalloc region [start, end), which | |
355 | * lies within the free region [free_region_start, free_region_end). | |
356 | * | |
357 | * This can be run lazily, long after the region was freed. It runs | |
358 | * under vmap_area_lock, so it's not safe to interact with the vmalloc/vmap | |
359 | * infrastructure. | |
360 | * | |
361 | * How does this work? | |
362 | * ------------------- | |
363 | * | |
364 | * We have a region that is page aligned, labelled as A. | |
365 | * That might not map onto the shadow in a way that is page-aligned: | |
366 | * | |
367 | * start end | |
368 | * v v | |
369 | * |????????|????????|AAAAAAAA|AA....AA|AAAAAAAA|????????| < vmalloc | |
370 | * -------- -------- -------- -------- -------- | |
371 | * | | | | | | |
372 | * | | | /-------/ | | |
373 | * \-------\|/------/ |/---------------/ | |
374 | * ||| || | |
375 | * |??AAAAAA|AAAAAAAA|AA??????| < shadow | |
376 | * (1) (2) (3) | |
377 | * | |
378 | * First we align the start upwards and the end downwards, so that the | |
379 | * shadow of the region aligns with shadow page boundaries. In the | |
380 | * example, this gives us the shadow page (2). This is the shadow entirely | |
381 | * covered by this allocation. | |
382 | * | |
383 | * Then we have the tricky bits. We want to know if we can free the | |
384 | * partially covered shadow pages - (1) and (3) in the example. For this, | |
385 | * we are given the start and end of the free region that contains this | |
386 | * allocation. Extending our previous example, we could have: | |
387 | * | |
388 | * free_region_start free_region_end | |
389 | * | start end | | |
390 | * v v v v | |
391 | * |FFFFFFFF|FFFFFFFF|AAAAAAAA|AA....AA|AAAAAAAA|FFFFFFFF| < vmalloc | |
392 | * -------- -------- -------- -------- -------- | |
393 | * | | | | | | |
394 | * | | | /-------/ | | |
395 | * \-------\|/------/ |/---------------/ | |
396 | * ||| || | |
397 | * |FFAAAAAA|AAAAAAAA|AAF?????| < shadow | |
398 | * (1) (2) (3) | |
399 | * | |
400 | * Once again, we align the start of the free region up, and the end of | |
401 | * the free region down so that the shadow is page aligned. So we can free | |
402 | * page (1) - we know no allocation currently uses anything in that page, | |
403 | * because all of it is in the vmalloc free region. But we cannot free | |
404 | * page (3), because we can't be sure that the rest of it is unused. | |
405 | * | |
406 | * We only consider pages that contain part of the original region for | |
407 | * freeing: we don't try to free other pages from the free region or we'd | |
408 | * end up trying to free huge chunks of virtual address space. | |
409 | * | |
410 | * Concurrency | |
411 | * ----------- | |
412 | * | |
413 | * How do we know that we're not freeing a page that is simultaneously | |
414 | * being used for a fresh allocation in kasan_populate_vmalloc(_pte)? | |
415 | * | |
416 | * We _can_ have kasan_release_vmalloc and kasan_populate_vmalloc running | |
417 | * at the same time. While we run under free_vmap_area_lock, the population | |
418 | * code does not. | |
419 | * | |
420 | * free_vmap_area_lock instead operates to ensure that the larger range | |
421 | * [free_region_start, free_region_end) is safe: because __alloc_vmap_area and | |
422 | * the per-cpu region-finding algorithm both run under free_vmap_area_lock, | |
423 | * no space identified as free will become used while we are running. This | |
424 | * means that so long as we are careful with alignment and only free shadow | |
425 | * pages entirely covered by the free region, we will not run in to any | |
426 | * trouble - any simultaneous allocations will be for disjoint regions. | |
427 | */ | |
428 | void kasan_release_vmalloc(unsigned long start, unsigned long end, | |
429 | unsigned long free_region_start, | |
430 | unsigned long free_region_end) | |
431 | { | |
432 | void *shadow_start, *shadow_end; | |
433 | unsigned long region_start, region_end; | |
434 | unsigned long size; | |
435 | ||
affc3f07 AK |
436 | region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE); |
437 | region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE); | |
bb359dbc | 438 | |
affc3f07 | 439 | free_region_start = ALIGN(free_region_start, KASAN_MEMORY_PER_SHADOW_PAGE); |
bb359dbc AK |
440 | |
441 | if (start != region_start && | |
442 | free_region_start < region_start) | |
affc3f07 | 443 | region_start -= KASAN_MEMORY_PER_SHADOW_PAGE; |
bb359dbc | 444 | |
affc3f07 | 445 | free_region_end = ALIGN_DOWN(free_region_end, KASAN_MEMORY_PER_SHADOW_PAGE); |
bb359dbc AK |
446 | |
447 | if (end != region_end && | |
448 | free_region_end > region_end) | |
affc3f07 | 449 | region_end += KASAN_MEMORY_PER_SHADOW_PAGE; |
bb359dbc AK |
450 | |
451 | shadow_start = kasan_mem_to_shadow((void *)region_start); | |
452 | shadow_end = kasan_mem_to_shadow((void *)region_end); | |
453 | ||
454 | if (shadow_end > shadow_start) { | |
455 | size = shadow_end - shadow_start; | |
456 | apply_to_existing_page_range(&init_mm, | |
457 | (unsigned long)shadow_start, | |
458 | size, kasan_depopulate_vmalloc_pte, | |
459 | NULL); | |
460 | flush_tlb_kernel_range((unsigned long)shadow_start, | |
461 | (unsigned long)shadow_end); | |
462 | } | |
463 | } | |
464 | ||
465 | #else /* CONFIG_KASAN_VMALLOC */ | |
466 | ||
467 | int kasan_module_alloc(void *addr, size_t size) | |
468 | { | |
469 | void *ret; | |
470 | size_t scaled_size; | |
471 | size_t shadow_size; | |
472 | unsigned long shadow_start; | |
473 | ||
474 | shadow_start = (unsigned long)kasan_mem_to_shadow(addr); | |
475 | scaled_size = (size + KASAN_GRANULE_SIZE - 1) >> | |
476 | KASAN_SHADOW_SCALE_SHIFT; | |
477 | shadow_size = round_up(scaled_size, PAGE_SIZE); | |
478 | ||
479 | if (WARN_ON(!PAGE_ALIGNED(shadow_start))) | |
480 | return -EINVAL; | |
481 | ||
482 | ret = __vmalloc_node_range(shadow_size, 1, shadow_start, | |
483 | shadow_start + shadow_size, | |
484 | GFP_KERNEL, | |
485 | PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, | |
486 | __builtin_return_address(0)); | |
487 | ||
488 | if (ret) { | |
489 | __memset(ret, KASAN_SHADOW_INIT, shadow_size); | |
490 | find_vm_area(addr)->flags |= VM_KASAN; | |
491 | kmemleak_ignore(ret); | |
492 | return 0; | |
493 | } | |
494 | ||
495 | return -ENOMEM; | |
496 | } | |
497 | ||
498 | void kasan_free_shadow(const struct vm_struct *vm) | |
499 | { | |
500 | if (vm->flags & VM_KASAN) | |
501 | vfree(kasan_mem_to_shadow(vm->addr)); | |
502 | } | |
503 | ||
504 | #endif |