]>
Commit | Line | Data |
---|---|---|
926e5392 AV |
1 | /* |
2 | * Debug helper to dump the current kernel pagetables of the system | |
3 | * so that we can see what the various memory ranges are set to. | |
4 | * | |
5 | * (C) Copyright 2008 Intel Corporation | |
6 | * | |
7 | * Author: Arjan van de Ven <arjan@linux.intel.com> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; version 2 | |
12 | * of the License. | |
13 | */ | |
14 | ||
fe770bf0 PA |
15 | #include <linux/debugfs.h> |
16 | #include <linux/mm.h> | |
84e629b6 | 17 | #include <linux/init.h> |
146fbb76 | 18 | #include <linux/sched.h> |
926e5392 | 19 | #include <linux/seq_file.h> |
926e5392 AV |
20 | |
21 | #include <asm/pgtable.h> | |
22 | ||
23 | /* | |
24 | * The dumper groups pagetable entries of the same type into one, and for | |
25 | * that it needs to keep some state when walking, and flush this state | |
26 | * when a "break" in the continuity is found. | |
27 | */ | |
28 | struct pg_state { | |
29 | int level; | |
30 | pgprot_t current_prot; | |
31 | unsigned long start_address; | |
32 | unsigned long current_address; | |
fe770bf0 | 33 | const struct addr_marker *marker; |
3891a04a | 34 | unsigned long lines; |
ef6bea6d | 35 | bool to_dmesg; |
e1a58320 SS |
36 | bool check_wx; |
37 | unsigned long wx_pages; | |
926e5392 AV |
38 | }; |
39 | ||
fe770bf0 PA |
40 | struct addr_marker { |
41 | unsigned long start_address; | |
42 | const char *name; | |
3891a04a | 43 | unsigned long max_lines; |
fe770bf0 PA |
44 | }; |
45 | ||
92851e2f AS |
46 | /* indices for address_markers; keep sync'd w/ address_markers below */ |
47 | enum address_markers_idx { | |
48 | USER_SPACE_NR = 0, | |
49 | #ifdef CONFIG_X86_64 | |
50 | KERNEL_SPACE_NR, | |
51 | LOW_KERNEL_NR, | |
52 | VMALLOC_START_NR, | |
53 | VMEMMAP_START_NR, | |
8a5a5d15 | 54 | # ifdef CONFIG_X86_ESPFIX64 |
3891a04a | 55 | ESPFIX_START_NR, |
8a5a5d15 | 56 | # endif |
92851e2f AS |
57 | HIGH_KERNEL_NR, |
58 | MODULES_VADDR_NR, | |
59 | MODULES_END_NR, | |
60 | #else | |
61 | KERNEL_SPACE_NR, | |
62 | VMALLOC_START_NR, | |
63 | VMALLOC_END_NR, | |
64 | # ifdef CONFIG_HIGHMEM | |
65 | PKMAP_BASE_NR, | |
66 | # endif | |
67 | FIXADDR_START_NR, | |
68 | #endif | |
69 | }; | |
70 | ||
fe770bf0 PA |
71 | /* Address space markers hints */ |
72 | static struct addr_marker address_markers[] = { | |
73 | { 0, "User Space" }, | |
74 | #ifdef CONFIG_X86_64 | |
75 | { 0x8000000000000000UL, "Kernel Space" }, | |
0483e1fa TG |
76 | { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, |
77 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | |
78 | { 0/* VMEMMAP_START */, "Vmemmap" }, | |
8a5a5d15 | 79 | # ifdef CONFIG_X86_ESPFIX64 |
3891a04a | 80 | { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, |
8266e31e MK |
81 | # endif |
82 | # ifdef CONFIG_EFI | |
83 | { EFI_VA_END, "EFI Runtime Services" }, | |
8a5a5d15 | 84 | # endif |
fe770bf0 | 85 | { __START_KERNEL_map, "High Kernel Mapping" }, |
9a79cf9c YL |
86 | { MODULES_VADDR, "Modules" }, |
87 | { MODULES_END, "End Modules" }, | |
fe770bf0 PA |
88 | #else |
89 | { PAGE_OFFSET, "Kernel Mapping" }, | |
90 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | |
91 | { 0/*VMALLOC_END*/, "vmalloc() End" }, | |
92 | # ifdef CONFIG_HIGHMEM | |
173ae9ba | 93 | { 0/*PKMAP_BASE*/, "Persistent kmap() Area" }, |
fe770bf0 PA |
94 | # endif |
95 | { 0/*FIXADDR_START*/, "Fixmap Area" }, | |
96 | #endif | |
97 | { -1, NULL } /* End of list */ | |
98 | }; | |
926e5392 | 99 | |
fe770bf0 PA |
100 | /* Multipliers for offsets within the PTEs */ |
101 | #define PTE_LEVEL_MULT (PAGE_SIZE) | |
102 | #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) | |
103 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | |
104 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) | |
926e5392 | 105 | |
ef6bea6d BP |
106 | #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ |
107 | ({ \ | |
108 | if (to_dmesg) \ | |
109 | printk(KERN_INFO fmt, ##args); \ | |
110 | else \ | |
111 | if (m) \ | |
112 | seq_printf(m, fmt, ##args); \ | |
113 | }) | |
114 | ||
115 | #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ | |
116 | ({ \ | |
117 | if (to_dmesg) \ | |
118 | printk(KERN_CONT fmt, ##args); \ | |
119 | else \ | |
120 | if (m) \ | |
121 | seq_printf(m, fmt, ##args); \ | |
122 | }) | |
123 | ||
926e5392 AV |
124 | /* |
125 | * Print a readable form of a pgprot_t to the seq_file | |
126 | */ | |
ef6bea6d | 127 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) |
926e5392 | 128 | { |
fe770bf0 PA |
129 | pgprotval_t pr = pgprot_val(prot); |
130 | static const char * const level_name[] = | |
131 | { "cr3", "pgd", "pud", "pmd", "pte" }; | |
132 | ||
133 | if (!pgprot_val(prot)) { | |
134 | /* Not present */ | |
f439c429 | 135 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
136 | } else { |
137 | if (pr & _PAGE_USER) | |
ef6bea6d | 138 | pt_dump_cont_printf(m, dmsg, "USR "); |
926e5392 | 139 | else |
ef6bea6d | 140 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 141 | if (pr & _PAGE_RW) |
ef6bea6d | 142 | pt_dump_cont_printf(m, dmsg, "RW "); |
fe770bf0 | 143 | else |
ef6bea6d | 144 | pt_dump_cont_printf(m, dmsg, "ro "); |
fe770bf0 | 145 | if (pr & _PAGE_PWT) |
ef6bea6d | 146 | pt_dump_cont_printf(m, dmsg, "PWT "); |
fe770bf0 | 147 | else |
ef6bea6d | 148 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 149 | if (pr & _PAGE_PCD) |
ef6bea6d | 150 | pt_dump_cont_printf(m, dmsg, "PCD "); |
926e5392 | 151 | else |
ef6bea6d | 152 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 153 | |
f439c429 JG |
154 | /* Bit 7 has a different meaning on level 3 vs 4 */ |
155 | if (level <= 3 && pr & _PAGE_PSE) | |
156 | pt_dump_cont_printf(m, dmsg, "PSE "); | |
157 | else | |
158 | pt_dump_cont_printf(m, dmsg, " "); | |
159 | if ((level == 4 && pr & _PAGE_PAT) || | |
160 | ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) | |
da25e628 | 161 | pt_dump_cont_printf(m, dmsg, "PAT "); |
f439c429 JG |
162 | else |
163 | pt_dump_cont_printf(m, dmsg, " "); | |
fe770bf0 | 164 | if (pr & _PAGE_GLOBAL) |
ef6bea6d | 165 | pt_dump_cont_printf(m, dmsg, "GLB "); |
fe770bf0 | 166 | else |
ef6bea6d | 167 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 168 | if (pr & _PAGE_NX) |
ef6bea6d | 169 | pt_dump_cont_printf(m, dmsg, "NX "); |
fe770bf0 | 170 | else |
ef6bea6d | 171 | pt_dump_cont_printf(m, dmsg, "x "); |
926e5392 | 172 | } |
ef6bea6d | 173 | pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); |
926e5392 AV |
174 | } |
175 | ||
176 | /* | |
fe770bf0 | 177 | * On 64 bits, sign-extend the 48 bit address to 64 bit |
926e5392 | 178 | */ |
fe770bf0 | 179 | static unsigned long normalize_addr(unsigned long u) |
926e5392 | 180 | { |
fe770bf0 PA |
181 | #ifdef CONFIG_X86_64 |
182 | return (signed long)(u << 16) >> 16; | |
183 | #else | |
926e5392 | 184 | return u; |
fe770bf0 | 185 | #endif |
926e5392 AV |
186 | } |
187 | ||
188 | /* | |
189 | * This function gets called on a break in a continuous series | |
190 | * of PTE entries; the next one is different so we need to | |
191 | * print what we collected so far. | |
192 | */ | |
193 | static void note_page(struct seq_file *m, struct pg_state *st, | |
fe770bf0 | 194 | pgprot_t new_prot, int level) |
926e5392 | 195 | { |
fe770bf0 | 196 | pgprotval_t prot, cur; |
3891a04a | 197 | static const char units[] = "BKMGTPE"; |
926e5392 AV |
198 | |
199 | /* | |
200 | * If we have a "break" in the series, we need to flush the state that | |
fe770bf0 PA |
201 | * we have now. "break" is either changing perms, levels or |
202 | * address space marker. | |
926e5392 | 203 | */ |
da25e628 TK |
204 | prot = pgprot_val(new_prot); |
205 | cur = pgprot_val(st->current_prot); | |
926e5392 | 206 | |
fe770bf0 PA |
207 | if (!st->level) { |
208 | /* First entry */ | |
209 | st->current_prot = new_prot; | |
210 | st->level = level; | |
211 | st->marker = address_markers; | |
3891a04a | 212 | st->lines = 0; |
ef6bea6d BP |
213 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
214 | st->marker->name); | |
fe770bf0 PA |
215 | } else if (prot != cur || level != st->level || |
216 | st->current_address >= st->marker[1].start_address) { | |
217 | const char *unit = units; | |
926e5392 | 218 | unsigned long delta; |
6424fb38 | 219 | int width = sizeof(unsigned long) * 2; |
e1a58320 SS |
220 | pgprotval_t pr = pgprot_val(st->current_prot); |
221 | ||
222 | if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) { | |
223 | WARN_ONCE(1, | |
224 | "x86/mm: Found insecure W+X mapping at address %p/%pS\n", | |
225 | (void *)st->start_address, | |
226 | (void *)st->start_address); | |
227 | st->wx_pages += (st->current_address - | |
228 | st->start_address) / PAGE_SIZE; | |
229 | } | |
926e5392 | 230 | |
926e5392 AV |
231 | /* |
232 | * Now print the actual finished series | |
233 | */ | |
3891a04a PA |
234 | if (!st->marker->max_lines || |
235 | st->lines < st->marker->max_lines) { | |
236 | pt_dump_seq_printf(m, st->to_dmesg, | |
237 | "0x%0*lx-0x%0*lx ", | |
238 | width, st->start_address, | |
239 | width, st->current_address); | |
926e5392 | 240 | |
3891a04a PA |
241 | delta = st->current_address - st->start_address; |
242 | while (!(delta & 1023) && unit[1]) { | |
243 | delta >>= 10; | |
244 | unit++; | |
245 | } | |
246 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", | |
247 | delta, *unit); | |
248 | printk_prot(m, st->current_prot, st->level, | |
249 | st->to_dmesg); | |
926e5392 | 250 | } |
3891a04a | 251 | st->lines++; |
fe770bf0 PA |
252 | |
253 | /* | |
254 | * We print markers for special areas of address space, | |
255 | * such as the start of vmalloc space etc. | |
256 | * This helps in the interpretation. | |
257 | */ | |
258 | if (st->current_address >= st->marker[1].start_address) { | |
3891a04a PA |
259 | if (st->marker->max_lines && |
260 | st->lines > st->marker->max_lines) { | |
261 | unsigned long nskip = | |
262 | st->lines - st->marker->max_lines; | |
263 | pt_dump_seq_printf(m, st->to_dmesg, | |
264 | "... %lu entr%s skipped ... \n", | |
265 | nskip, | |
266 | nskip == 1 ? "y" : "ies"); | |
267 | } | |
fe770bf0 | 268 | st->marker++; |
3891a04a | 269 | st->lines = 0; |
ef6bea6d BP |
270 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
271 | st->marker->name); | |
926e5392 | 272 | } |
fe770bf0 | 273 | |
926e5392 AV |
274 | st->start_address = st->current_address; |
275 | st->current_prot = new_prot; | |
276 | st->level = level; | |
fe770bf0 | 277 | } |
926e5392 AV |
278 | } |
279 | ||
fe770bf0 | 280 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, |
926e5392 AV |
281 | unsigned long P) |
282 | { | |
283 | int i; | |
284 | pte_t *start; | |
da25e628 | 285 | pgprotval_t prot; |
926e5392 AV |
286 | |
287 | start = (pte_t *) pmd_page_vaddr(addr); | |
288 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
da25e628 | 289 | prot = pte_flags(*start); |
fe770bf0 | 290 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
da25e628 | 291 | note_page(m, st, __pgprot(prot), 4); |
926e5392 AV |
292 | start++; |
293 | } | |
294 | } | |
295 | ||
fe770bf0 | 296 | #if PTRS_PER_PMD > 1 |
926e5392 | 297 | |
fe770bf0 | 298 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
926e5392 AV |
299 | unsigned long P) |
300 | { | |
301 | int i; | |
302 | pmd_t *start; | |
da25e628 | 303 | pgprotval_t prot; |
926e5392 AV |
304 | |
305 | start = (pmd_t *) pud_page_vaddr(addr); | |
306 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
fe770bf0 | 307 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
926e5392 | 308 | if (!pmd_none(*start)) { |
da25e628 TK |
309 | if (pmd_large(*start) || !pmd_present(*start)) { |
310 | prot = pmd_flags(*start); | |
926e5392 | 311 | note_page(m, st, __pgprot(prot), 3); |
da25e628 | 312 | } else { |
fe770bf0 PA |
313 | walk_pte_level(m, st, *start, |
314 | P + i * PMD_LEVEL_MULT); | |
da25e628 | 315 | } |
926e5392 AV |
316 | } else |
317 | note_page(m, st, __pgprot(0), 3); | |
318 | start++; | |
319 | } | |
320 | } | |
321 | ||
fe770bf0 PA |
322 | #else |
323 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | |
324 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | |
325 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | |
326 | #endif | |
926e5392 | 327 | |
fe770bf0 PA |
328 | #if PTRS_PER_PUD > 1 |
329 | ||
243b72aa AR |
330 | /* |
331 | * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y | |
332 | * KASAN fills page tables with the same values. Since there is no | |
333 | * point in checking page table more than once we just skip repeated | |
334 | * entries. This saves us dozens of seconds during boot. | |
335 | */ | |
336 | static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx) | |
337 | { | |
338 | return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud)); | |
339 | } | |
340 | ||
fe770bf0 | 341 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, |
926e5392 AV |
342 | unsigned long P) |
343 | { | |
344 | int i; | |
345 | pud_t *start; | |
da25e628 | 346 | pgprotval_t prot; |
243b72aa | 347 | pud_t *prev_pud = NULL; |
926e5392 AV |
348 | |
349 | start = (pud_t *) pgd_page_vaddr(addr); | |
350 | ||
351 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
fe770bf0 | 352 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
243b72aa AR |
353 | if (!pud_none(*start) && |
354 | !pud_already_checked(prev_pud, start, st->check_wx)) { | |
da25e628 TK |
355 | if (pud_large(*start) || !pud_present(*start)) { |
356 | prot = pud_flags(*start); | |
926e5392 | 357 | note_page(m, st, __pgprot(prot), 2); |
da25e628 | 358 | } else { |
fe770bf0 PA |
359 | walk_pmd_level(m, st, *start, |
360 | P + i * PUD_LEVEL_MULT); | |
da25e628 | 361 | } |
926e5392 AV |
362 | } else |
363 | note_page(m, st, __pgprot(0), 2); | |
364 | ||
243b72aa | 365 | prev_pud = start; |
926e5392 AV |
366 | start++; |
367 | } | |
368 | } | |
369 | ||
fe770bf0 PA |
370 | #else |
371 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) | |
372 | #define pgd_large(a) pud_large(__pud(pgd_val(a))) | |
373 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | |
374 | #endif | |
375 | ||
f4e342c8 BO |
376 | static inline bool is_hypervisor_range(int idx) |
377 | { | |
b176862f | 378 | #ifdef CONFIG_X86_64 |
f4e342c8 BO |
379 | /* |
380 | * ffff800000000000 - ffff87ffffffffff is reserved for | |
381 | * the hypervisor. | |
382 | */ | |
b176862f BP |
383 | return (idx >= pgd_index(__PAGE_OFFSET) - 16) && |
384 | (idx < pgd_index(__PAGE_OFFSET)); | |
f4e342c8 | 385 | #else |
b176862f | 386 | return false; |
f4e342c8 | 387 | #endif |
b176862f | 388 | } |
f4e342c8 | 389 | |
e1a58320 SS |
390 | static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, |
391 | bool checkwx) | |
926e5392 | 392 | { |
fe770bf0 | 393 | #ifdef CONFIG_X86_64 |
926e5392 | 394 | pgd_t *start = (pgd_t *) &init_level4_pgt; |
fe770bf0 PA |
395 | #else |
396 | pgd_t *start = swapper_pg_dir; | |
397 | #endif | |
da25e628 | 398 | pgprotval_t prot; |
926e5392 | 399 | int i; |
ef6bea6d | 400 | struct pg_state st = {}; |
926e5392 | 401 | |
ef6bea6d BP |
402 | if (pgd) { |
403 | start = pgd; | |
404 | st.to_dmesg = true; | |
405 | } | |
926e5392 | 406 | |
e1a58320 SS |
407 | st.check_wx = checkwx; |
408 | if (checkwx) | |
409 | st.wx_pages = 0; | |
410 | ||
926e5392 | 411 | for (i = 0; i < PTRS_PER_PGD; i++) { |
fe770bf0 | 412 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
f4e342c8 | 413 | if (!pgd_none(*start) && !is_hypervisor_range(i)) { |
da25e628 TK |
414 | if (pgd_large(*start) || !pgd_present(*start)) { |
415 | prot = pgd_flags(*start); | |
fe770bf0 | 416 | note_page(m, &st, __pgprot(prot), 1); |
da25e628 | 417 | } else { |
fe770bf0 PA |
418 | walk_pud_level(m, &st, *start, |
419 | i * PGD_LEVEL_MULT); | |
da25e628 | 420 | } |
fe770bf0 | 421 | } else |
926e5392 | 422 | note_page(m, &st, __pgprot(0), 1); |
fe770bf0 | 423 | |
146fbb76 | 424 | cond_resched(); |
926e5392 AV |
425 | start++; |
426 | } | |
fe770bf0 PA |
427 | |
428 | /* Flush out the last page */ | |
429 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | |
430 | note_page(m, &st, __pgprot(0), 0); | |
e1a58320 SS |
431 | if (!checkwx) |
432 | return; | |
433 | if (st.wx_pages) | |
434 | pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n", | |
435 | st.wx_pages); | |
436 | else | |
437 | pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); | |
438 | } | |
439 | ||
440 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) | |
441 | { | |
442 | ptdump_walk_pgd_level_core(m, pgd, false); | |
926e5392 | 443 | } |
8609d1b5 | 444 | EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level); |
926e5392 | 445 | |
e1a58320 SS |
446 | void ptdump_walk_pgd_level_checkwx(void) |
447 | { | |
448 | ptdump_walk_pgd_level_core(NULL, NULL, true); | |
449 | } | |
450 | ||
8609d1b5 | 451 | static int __init pt_dump_init(void) |
926e5392 | 452 | { |
0483e1fa TG |
453 | /* |
454 | * Various markers are not compile-time constants, so assign them | |
455 | * here. | |
456 | */ | |
457 | #ifdef CONFIG_X86_64 | |
458 | address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET; | |
459 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; | |
460 | address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; | |
461 | #endif | |
fe770bf0 | 462 | #ifdef CONFIG_X86_32 |
92851e2f AS |
463 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
464 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; | |
fe770bf0 | 465 | # ifdef CONFIG_HIGHMEM |
92851e2f | 466 | address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; |
fe770bf0 | 467 | # endif |
92851e2f | 468 | address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; |
fe770bf0 PA |
469 | #endif |
470 | ||
926e5392 AV |
471 | return 0; |
472 | } | |
926e5392 | 473 | __initcall(pt_dump_init); |