]>
Commit | Line | Data |
---|---|---|
926e5392 AV |
1 | /* |
2 | * Debug helper to dump the current kernel pagetables of the system | |
3 | * so that we can see what the various memory ranges are set to. | |
4 | * | |
5 | * (C) Copyright 2008 Intel Corporation | |
6 | * | |
7 | * Author: Arjan van de Ven <arjan@linux.intel.com> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; version 2 | |
12 | * of the License. | |
13 | */ | |
14 | ||
fe770bf0 PA |
15 | #include <linux/debugfs.h> |
16 | #include <linux/mm.h> | |
926e5392 AV |
17 | #include <linux/module.h> |
18 | #include <linux/seq_file.h> | |
926e5392 AV |
19 | |
20 | #include <asm/pgtable.h> | |
21 | ||
22 | /* | |
23 | * The dumper groups pagetable entries of the same type into one, and for | |
24 | * that it needs to keep some state when walking, and flush this state | |
25 | * when a "break" in the continuity is found. | |
26 | */ | |
27 | struct pg_state { | |
28 | int level; | |
29 | pgprot_t current_prot; | |
30 | unsigned long start_address; | |
31 | unsigned long current_address; | |
fe770bf0 | 32 | const struct addr_marker *marker; |
3891a04a | 33 | unsigned long lines; |
ef6bea6d | 34 | bool to_dmesg; |
e1a58320 SS |
35 | bool check_wx; |
36 | unsigned long wx_pages; | |
926e5392 AV |
37 | }; |
38 | ||
fe770bf0 PA |
39 | struct addr_marker { |
40 | unsigned long start_address; | |
41 | const char *name; | |
3891a04a | 42 | unsigned long max_lines; |
fe770bf0 PA |
43 | }; |
44 | ||
92851e2f AS |
45 | /* indices for address_markers; keep sync'd w/ address_markers below */ |
46 | enum address_markers_idx { | |
47 | USER_SPACE_NR = 0, | |
48 | #ifdef CONFIG_X86_64 | |
49 | KERNEL_SPACE_NR, | |
50 | LOW_KERNEL_NR, | |
51 | VMALLOC_START_NR, | |
52 | VMEMMAP_START_NR, | |
8a5a5d15 | 53 | # ifdef CONFIG_X86_ESPFIX64 |
3891a04a | 54 | ESPFIX_START_NR, |
8a5a5d15 | 55 | # endif |
92851e2f AS |
56 | HIGH_KERNEL_NR, |
57 | MODULES_VADDR_NR, | |
58 | MODULES_END_NR, | |
59 | #else | |
60 | KERNEL_SPACE_NR, | |
61 | VMALLOC_START_NR, | |
62 | VMALLOC_END_NR, | |
63 | # ifdef CONFIG_HIGHMEM | |
64 | PKMAP_BASE_NR, | |
65 | # endif | |
66 | FIXADDR_START_NR, | |
67 | #endif | |
68 | }; | |
69 | ||
fe770bf0 PA |
70 | /* Address space markers hints */ |
71 | static struct addr_marker address_markers[] = { | |
72 | { 0, "User Space" }, | |
73 | #ifdef CONFIG_X86_64 | |
74 | { 0x8000000000000000UL, "Kernel Space" }, | |
684eb016 | 75 | { PAGE_OFFSET, "Low Kernel Mapping" }, |
fe770bf0 | 76 | { VMALLOC_START, "vmalloc() Area" }, |
fe770bf0 | 77 | { VMEMMAP_START, "Vmemmap" }, |
8a5a5d15 | 78 | # ifdef CONFIG_X86_ESPFIX64 |
3891a04a | 79 | { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, |
8266e31e MK |
80 | # endif |
81 | # ifdef CONFIG_EFI | |
82 | { EFI_VA_END, "EFI Runtime Services" }, | |
8a5a5d15 | 83 | # endif |
fe770bf0 | 84 | { __START_KERNEL_map, "High Kernel Mapping" }, |
9a79cf9c YL |
85 | { MODULES_VADDR, "Modules" }, |
86 | { MODULES_END, "End Modules" }, | |
fe770bf0 PA |
87 | #else |
88 | { PAGE_OFFSET, "Kernel Mapping" }, | |
89 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | |
90 | { 0/*VMALLOC_END*/, "vmalloc() End" }, | |
91 | # ifdef CONFIG_HIGHMEM | |
173ae9ba | 92 | { 0/*PKMAP_BASE*/, "Persistent kmap() Area" }, |
fe770bf0 PA |
93 | # endif |
94 | { 0/*FIXADDR_START*/, "Fixmap Area" }, | |
95 | #endif | |
96 | { -1, NULL } /* End of list */ | |
97 | }; | |
926e5392 | 98 | |
fe770bf0 PA |
99 | /* Multipliers for offsets within the PTEs */ |
100 | #define PTE_LEVEL_MULT (PAGE_SIZE) | |
101 | #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) | |
102 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | |
103 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) | |
926e5392 | 104 | |
ef6bea6d BP |
105 | #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ |
106 | ({ \ | |
107 | if (to_dmesg) \ | |
108 | printk(KERN_INFO fmt, ##args); \ | |
109 | else \ | |
110 | if (m) \ | |
111 | seq_printf(m, fmt, ##args); \ | |
112 | }) | |
113 | ||
114 | #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ | |
115 | ({ \ | |
116 | if (to_dmesg) \ | |
117 | printk(KERN_CONT fmt, ##args); \ | |
118 | else \ | |
119 | if (m) \ | |
120 | seq_printf(m, fmt, ##args); \ | |
121 | }) | |
122 | ||
926e5392 AV |
123 | /* |
124 | * Print a readable form of a pgprot_t to the seq_file | |
125 | */ | |
ef6bea6d | 126 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) |
926e5392 | 127 | { |
fe770bf0 PA |
128 | pgprotval_t pr = pgprot_val(prot); |
129 | static const char * const level_name[] = | |
130 | { "cr3", "pgd", "pud", "pmd", "pte" }; | |
131 | ||
132 | if (!pgprot_val(prot)) { | |
133 | /* Not present */ | |
f439c429 | 134 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
135 | } else { |
136 | if (pr & _PAGE_USER) | |
ef6bea6d | 137 | pt_dump_cont_printf(m, dmsg, "USR "); |
926e5392 | 138 | else |
ef6bea6d | 139 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 140 | if (pr & _PAGE_RW) |
ef6bea6d | 141 | pt_dump_cont_printf(m, dmsg, "RW "); |
fe770bf0 | 142 | else |
ef6bea6d | 143 | pt_dump_cont_printf(m, dmsg, "ro "); |
fe770bf0 | 144 | if (pr & _PAGE_PWT) |
ef6bea6d | 145 | pt_dump_cont_printf(m, dmsg, "PWT "); |
fe770bf0 | 146 | else |
ef6bea6d | 147 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 148 | if (pr & _PAGE_PCD) |
ef6bea6d | 149 | pt_dump_cont_printf(m, dmsg, "PCD "); |
926e5392 | 150 | else |
ef6bea6d | 151 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 152 | |
f439c429 JG |
153 | /* Bit 7 has a different meaning on level 3 vs 4 */ |
154 | if (level <= 3 && pr & _PAGE_PSE) | |
155 | pt_dump_cont_printf(m, dmsg, "PSE "); | |
156 | else | |
157 | pt_dump_cont_printf(m, dmsg, " "); | |
158 | if ((level == 4 && pr & _PAGE_PAT) || | |
159 | ((level == 3 || level == 2) && pr & _PAGE_PAT_LARGE)) | |
da25e628 | 160 | pt_dump_cont_printf(m, dmsg, "PAT "); |
f439c429 JG |
161 | else |
162 | pt_dump_cont_printf(m, dmsg, " "); | |
fe770bf0 | 163 | if (pr & _PAGE_GLOBAL) |
ef6bea6d | 164 | pt_dump_cont_printf(m, dmsg, "GLB "); |
fe770bf0 | 165 | else |
ef6bea6d | 166 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 167 | if (pr & _PAGE_NX) |
ef6bea6d | 168 | pt_dump_cont_printf(m, dmsg, "NX "); |
fe770bf0 | 169 | else |
ef6bea6d | 170 | pt_dump_cont_printf(m, dmsg, "x "); |
926e5392 | 171 | } |
ef6bea6d | 172 | pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); |
926e5392 AV |
173 | } |
174 | ||
175 | /* | |
fe770bf0 | 176 | * On 64 bits, sign-extend the 48 bit address to 64 bit |
926e5392 | 177 | */ |
fe770bf0 | 178 | static unsigned long normalize_addr(unsigned long u) |
926e5392 | 179 | { |
fe770bf0 PA |
180 | #ifdef CONFIG_X86_64 |
181 | return (signed long)(u << 16) >> 16; | |
182 | #else | |
926e5392 | 183 | return u; |
fe770bf0 | 184 | #endif |
926e5392 AV |
185 | } |
186 | ||
187 | /* | |
188 | * This function gets called on a break in a continuous series | |
189 | * of PTE entries; the next one is different so we need to | |
190 | * print what we collected so far. | |
191 | */ | |
192 | static void note_page(struct seq_file *m, struct pg_state *st, | |
fe770bf0 | 193 | pgprot_t new_prot, int level) |
926e5392 | 194 | { |
fe770bf0 | 195 | pgprotval_t prot, cur; |
3891a04a | 196 | static const char units[] = "BKMGTPE"; |
926e5392 AV |
197 | |
198 | /* | |
199 | * If we have a "break" in the series, we need to flush the state that | |
fe770bf0 PA |
200 | * we have now. "break" is either changing perms, levels or |
201 | * address space marker. | |
926e5392 | 202 | */ |
da25e628 TK |
203 | prot = pgprot_val(new_prot); |
204 | cur = pgprot_val(st->current_prot); | |
926e5392 | 205 | |
fe770bf0 PA |
206 | if (!st->level) { |
207 | /* First entry */ | |
208 | st->current_prot = new_prot; | |
209 | st->level = level; | |
210 | st->marker = address_markers; | |
3891a04a | 211 | st->lines = 0; |
ef6bea6d BP |
212 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
213 | st->marker->name); | |
fe770bf0 PA |
214 | } else if (prot != cur || level != st->level || |
215 | st->current_address >= st->marker[1].start_address) { | |
216 | const char *unit = units; | |
926e5392 | 217 | unsigned long delta; |
6424fb38 | 218 | int width = sizeof(unsigned long) * 2; |
e1a58320 SS |
219 | pgprotval_t pr = pgprot_val(st->current_prot); |
220 | ||
221 | if (st->check_wx && (pr & _PAGE_RW) && !(pr & _PAGE_NX)) { | |
222 | WARN_ONCE(1, | |
223 | "x86/mm: Found insecure W+X mapping at address %p/%pS\n", | |
224 | (void *)st->start_address, | |
225 | (void *)st->start_address); | |
226 | st->wx_pages += (st->current_address - | |
227 | st->start_address) / PAGE_SIZE; | |
228 | } | |
926e5392 | 229 | |
926e5392 AV |
230 | /* |
231 | * Now print the actual finished series | |
232 | */ | |
3891a04a PA |
233 | if (!st->marker->max_lines || |
234 | st->lines < st->marker->max_lines) { | |
235 | pt_dump_seq_printf(m, st->to_dmesg, | |
236 | "0x%0*lx-0x%0*lx ", | |
237 | width, st->start_address, | |
238 | width, st->current_address); | |
926e5392 | 239 | |
3891a04a PA |
240 | delta = st->current_address - st->start_address; |
241 | while (!(delta & 1023) && unit[1]) { | |
242 | delta >>= 10; | |
243 | unit++; | |
244 | } | |
245 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", | |
246 | delta, *unit); | |
247 | printk_prot(m, st->current_prot, st->level, | |
248 | st->to_dmesg); | |
926e5392 | 249 | } |
3891a04a | 250 | st->lines++; |
fe770bf0 PA |
251 | |
252 | /* | |
253 | * We print markers for special areas of address space, | |
254 | * such as the start of vmalloc space etc. | |
255 | * This helps in the interpretation. | |
256 | */ | |
257 | if (st->current_address >= st->marker[1].start_address) { | |
3891a04a PA |
258 | if (st->marker->max_lines && |
259 | st->lines > st->marker->max_lines) { | |
260 | unsigned long nskip = | |
261 | st->lines - st->marker->max_lines; | |
262 | pt_dump_seq_printf(m, st->to_dmesg, | |
263 | "... %lu entr%s skipped ... \n", | |
264 | nskip, | |
265 | nskip == 1 ? "y" : "ies"); | |
266 | } | |
fe770bf0 | 267 | st->marker++; |
3891a04a | 268 | st->lines = 0; |
ef6bea6d BP |
269 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
270 | st->marker->name); | |
926e5392 | 271 | } |
fe770bf0 | 272 | |
926e5392 AV |
273 | st->start_address = st->current_address; |
274 | st->current_prot = new_prot; | |
275 | st->level = level; | |
fe770bf0 | 276 | } |
926e5392 AV |
277 | } |
278 | ||
fe770bf0 | 279 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, |
926e5392 AV |
280 | unsigned long P) |
281 | { | |
282 | int i; | |
283 | pte_t *start; | |
da25e628 | 284 | pgprotval_t prot; |
926e5392 AV |
285 | |
286 | start = (pte_t *) pmd_page_vaddr(addr); | |
287 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
da25e628 | 288 | prot = pte_flags(*start); |
fe770bf0 | 289 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
da25e628 | 290 | note_page(m, st, __pgprot(prot), 4); |
926e5392 AV |
291 | start++; |
292 | } | |
293 | } | |
294 | ||
fe770bf0 | 295 | #if PTRS_PER_PMD > 1 |
926e5392 | 296 | |
fe770bf0 | 297 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
926e5392 AV |
298 | unsigned long P) |
299 | { | |
300 | int i; | |
301 | pmd_t *start; | |
da25e628 | 302 | pgprotval_t prot; |
926e5392 AV |
303 | |
304 | start = (pmd_t *) pud_page_vaddr(addr); | |
305 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
fe770bf0 | 306 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
926e5392 | 307 | if (!pmd_none(*start)) { |
da25e628 TK |
308 | if (pmd_large(*start) || !pmd_present(*start)) { |
309 | prot = pmd_flags(*start); | |
926e5392 | 310 | note_page(m, st, __pgprot(prot), 3); |
da25e628 | 311 | } else { |
fe770bf0 PA |
312 | walk_pte_level(m, st, *start, |
313 | P + i * PMD_LEVEL_MULT); | |
da25e628 | 314 | } |
926e5392 AV |
315 | } else |
316 | note_page(m, st, __pgprot(0), 3); | |
317 | start++; | |
318 | } | |
319 | } | |
320 | ||
fe770bf0 PA |
321 | #else |
322 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | |
323 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | |
324 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | |
325 | #endif | |
926e5392 | 326 | |
fe770bf0 PA |
327 | #if PTRS_PER_PUD > 1 |
328 | ||
329 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |
926e5392 AV |
330 | unsigned long P) |
331 | { | |
332 | int i; | |
333 | pud_t *start; | |
da25e628 | 334 | pgprotval_t prot; |
926e5392 AV |
335 | |
336 | start = (pud_t *) pgd_page_vaddr(addr); | |
337 | ||
338 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
fe770bf0 | 339 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
926e5392 | 340 | if (!pud_none(*start)) { |
da25e628 TK |
341 | if (pud_large(*start) || !pud_present(*start)) { |
342 | prot = pud_flags(*start); | |
926e5392 | 343 | note_page(m, st, __pgprot(prot), 2); |
da25e628 | 344 | } else { |
fe770bf0 PA |
345 | walk_pmd_level(m, st, *start, |
346 | P + i * PUD_LEVEL_MULT); | |
da25e628 | 347 | } |
926e5392 AV |
348 | } else |
349 | note_page(m, st, __pgprot(0), 2); | |
350 | ||
351 | start++; | |
352 | } | |
353 | } | |
354 | ||
fe770bf0 PA |
355 | #else |
356 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) | |
357 | #define pgd_large(a) pud_large(__pud(pgd_val(a))) | |
358 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | |
359 | #endif | |
360 | ||
f4e342c8 BO |
361 | #ifdef CONFIG_X86_64 |
362 | static inline bool is_hypervisor_range(int idx) | |
363 | { | |
364 | /* | |
365 | * ffff800000000000 - ffff87ffffffffff is reserved for | |
366 | * the hypervisor. | |
367 | */ | |
368 | return paravirt_enabled() && | |
369 | (idx >= pgd_index(__PAGE_OFFSET) - 16) && | |
370 | (idx < pgd_index(__PAGE_OFFSET)); | |
371 | } | |
372 | #else | |
373 | static inline bool is_hypervisor_range(int idx) { return false; } | |
374 | #endif | |
375 | ||
e1a58320 SS |
376 | static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, |
377 | bool checkwx) | |
926e5392 | 378 | { |
fe770bf0 | 379 | #ifdef CONFIG_X86_64 |
926e5392 | 380 | pgd_t *start = (pgd_t *) &init_level4_pgt; |
fe770bf0 PA |
381 | #else |
382 | pgd_t *start = swapper_pg_dir; | |
383 | #endif | |
da25e628 | 384 | pgprotval_t prot; |
926e5392 | 385 | int i; |
ef6bea6d | 386 | struct pg_state st = {}; |
926e5392 | 387 | |
ef6bea6d BP |
388 | if (pgd) { |
389 | start = pgd; | |
390 | st.to_dmesg = true; | |
391 | } | |
926e5392 | 392 | |
e1a58320 SS |
393 | st.check_wx = checkwx; |
394 | if (checkwx) | |
395 | st.wx_pages = 0; | |
396 | ||
926e5392 | 397 | for (i = 0; i < PTRS_PER_PGD; i++) { |
fe770bf0 | 398 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
f4e342c8 | 399 | if (!pgd_none(*start) && !is_hypervisor_range(i)) { |
da25e628 TK |
400 | if (pgd_large(*start) || !pgd_present(*start)) { |
401 | prot = pgd_flags(*start); | |
fe770bf0 | 402 | note_page(m, &st, __pgprot(prot), 1); |
da25e628 | 403 | } else { |
fe770bf0 PA |
404 | walk_pud_level(m, &st, *start, |
405 | i * PGD_LEVEL_MULT); | |
da25e628 | 406 | } |
fe770bf0 | 407 | } else |
926e5392 | 408 | note_page(m, &st, __pgprot(0), 1); |
fe770bf0 | 409 | |
926e5392 AV |
410 | start++; |
411 | } | |
fe770bf0 PA |
412 | |
413 | /* Flush out the last page */ | |
414 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | |
415 | note_page(m, &st, __pgprot(0), 0); | |
e1a58320 SS |
416 | if (!checkwx) |
417 | return; | |
418 | if (st.wx_pages) | |
419 | pr_info("x86/mm: Checked W+X mappings: FAILED, %lu W+X pages found.\n", | |
420 | st.wx_pages); | |
421 | else | |
422 | pr_info("x86/mm: Checked W+X mappings: passed, no W+X pages found.\n"); | |
423 | } | |
424 | ||
425 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) | |
426 | { | |
427 | ptdump_walk_pgd_level_core(m, pgd, false); | |
926e5392 | 428 | } |
8609d1b5 | 429 | EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level); |
926e5392 | 430 | |
e1a58320 SS |
431 | void ptdump_walk_pgd_level_checkwx(void) |
432 | { | |
433 | ptdump_walk_pgd_level_core(NULL, NULL, true); | |
434 | } | |
435 | ||
8609d1b5 | 436 | static int __init pt_dump_init(void) |
926e5392 | 437 | { |
fe770bf0 PA |
438 | #ifdef CONFIG_X86_32 |
439 | /* Not a compile-time constant on x86-32 */ | |
92851e2f AS |
440 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
441 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; | |
fe770bf0 | 442 | # ifdef CONFIG_HIGHMEM |
92851e2f | 443 | address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; |
fe770bf0 | 444 | # endif |
92851e2f | 445 | address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; |
fe770bf0 PA |
446 | #endif |
447 | ||
926e5392 AV |
448 | return 0; |
449 | } | |
450 | ||
451 | __initcall(pt_dump_init); | |
452 | MODULE_LICENSE("GPL"); | |
453 | MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); | |
454 | MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); |