]>
Commit | Line | Data |
---|---|---|
aa8c6248 TG |
1 | /* |
2 | * Copyright(c) 2017 Intel Corporation. All rights reserved. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of version 2 of the GNU General Public License as | |
6 | * published by the Free Software Foundation. | |
7 | * | |
8 | * This program is distributed in the hope that it will be useful, but | |
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * General Public License for more details. | |
12 | * | |
13 | * This code is based in part on work published here: | |
14 | * | |
15 | * https://github.com/IAIK/KAISER | |
16 | * | |
17 | * The original work was written by and and signed off by for the Linux | |
18 | * kernel by: | |
19 | * | |
20 | * Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at> | |
21 | * Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at> | |
22 | * Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at> | |
23 | * Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at> | |
24 | * | |
25 | * Major changes to the original code by: Dave Hansen <dave.hansen@intel.com> | |
26 | * Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and | |
27 | * Andy Lutomirsky <luto@amacapital.net> | |
28 | */ | |
29 | #include <linux/kernel.h> | |
30 | #include <linux/errno.h> | |
31 | #include <linux/string.h> | |
32 | #include <linux/types.h> | |
33 | #include <linux/bug.h> | |
34 | #include <linux/init.h> | |
35 | #include <linux/spinlock.h> | |
36 | #include <linux/mm.h> | |
37 | #include <linux/uaccess.h> | |
38 | ||
39 | #include <asm/cpufeature.h> | |
40 | #include <asm/hypervisor.h> | |
41 | #include <asm/cmdline.h> | |
42 | #include <asm/pti.h> | |
43 | #include <asm/pgtable.h> | |
44 | #include <asm/pgalloc.h> | |
45 | #include <asm/tlbflush.h> | |
46 | #include <asm/desc.h> | |
47 | ||
48 | #undef pr_fmt | |
49 | #define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt | |
50 | ||
03f4424f AL |
51 | /* Backporting helper */ |
52 | #ifndef __GFP_NOTRACK | |
53 | #define __GFP_NOTRACK 0 | |
54 | #endif | |
55 | ||
aa8c6248 TG |
56 | static void __init pti_print_if_insecure(const char *reason) |
57 | { | |
58 | if (boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) | |
59 | pr_info("%s\n", reason); | |
60 | } | |
61 | ||
41f4c20b BP |
62 | static void __init pti_print_if_secure(const char *reason) |
63 | { | |
64 | if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) | |
65 | pr_info("%s\n", reason); | |
66 | } | |
67 | ||
aa8c6248 TG |
68 | void __init pti_check_boottime_disable(void) |
69 | { | |
41f4c20b BP |
70 | char arg[5]; |
71 | int ret; | |
72 | ||
aa8c6248 TG |
73 | if (hypervisor_is_type(X86_HYPER_XEN_PV)) { |
74 | pti_print_if_insecure("disabled on XEN PV."); | |
75 | return; | |
76 | } | |
77 | ||
41f4c20b BP |
78 | ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg)); |
79 | if (ret > 0) { | |
80 | if (ret == 3 && !strncmp(arg, "off", 3)) { | |
81 | pti_print_if_insecure("disabled on command line."); | |
82 | return; | |
83 | } | |
84 | if (ret == 2 && !strncmp(arg, "on", 2)) { | |
85 | pti_print_if_secure("force enabled on command line."); | |
86 | goto enable; | |
87 | } | |
88 | if (ret == 4 && !strncmp(arg, "auto", 4)) | |
89 | goto autosel; | |
90 | } | |
91 | ||
aa8c6248 TG |
92 | if (cmdline_find_option_bool(boot_command_line, "nopti")) { |
93 | pti_print_if_insecure("disabled on command line."); | |
94 | return; | |
95 | } | |
96 | ||
41f4c20b | 97 | autosel: |
aa8c6248 TG |
98 | if (!boot_cpu_has_bug(X86_BUG_CPU_INSECURE)) |
99 | return; | |
41f4c20b | 100 | enable: |
aa8c6248 TG |
101 | setup_force_cpu_cap(X86_FEATURE_PTI); |
102 | } | |
103 | ||
61e9b367 DH |
104 | pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) |
105 | { | |
106 | /* | |
107 | * Changes to the high (kernel) portion of the kernelmode page | |
108 | * tables are not automatically propagated to the usermode tables. | |
109 | * | |
110 | * Users should keep in mind that, unlike the kernelmode tables, | |
111 | * there is no vmalloc_fault equivalent for the usermode tables. | |
112 | * Top-level entries added to init_mm's usermode pgd after boot | |
113 | * will not be automatically propagated to other mms. | |
114 | */ | |
115 | if (!pgdp_maps_userspace(pgdp)) | |
116 | return pgd; | |
117 | ||
118 | /* | |
119 | * The user page tables get the full PGD, accessible from | |
120 | * userspace: | |
121 | */ | |
122 | kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; | |
123 | ||
124 | /* | |
125 | * If this is normal user memory, make it NX in the kernel | |
126 | * pagetables so that, if we somehow screw up and return to | |
127 | * usermode with the kernel CR3 loaded, we'll get a page fault | |
128 | * instead of allowing user code to execute with the wrong CR3. | |
129 | * | |
130 | * As exceptions, we don't set NX if: | |
131 | * - _PAGE_USER is not set. This could be an executable | |
132 | * EFI runtime mapping or something similar, and the kernel | |
133 | * may execute from it | |
134 | * - we don't have NX support | |
135 | * - we're clearing the PGD (i.e. the new pgd is not present). | |
136 | */ | |
137 | if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) && | |
138 | (__supported_pte_mask & _PAGE_NX)) | |
139 | pgd.pgd |= _PAGE_NX; | |
140 | ||
141 | /* return the copy of the PGD we want the kernel to use: */ | |
142 | return pgd; | |
143 | } | |
144 | ||
03f4424f AL |
145 | /* |
146 | * Walk the user copy of the page tables (optionally) trying to allocate | |
147 | * page table pages on the way down. | |
148 | * | |
149 | * Returns a pointer to a P4D on success, or NULL on failure. | |
150 | */ | |
151 | static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address) | |
152 | { | |
153 | pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address)); | |
154 | gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); | |
155 | ||
156 | if (address < PAGE_OFFSET) { | |
157 | WARN_ONCE(1, "attempt to walk user address\n"); | |
158 | return NULL; | |
159 | } | |
160 | ||
161 | if (pgd_none(*pgd)) { | |
162 | unsigned long new_p4d_page = __get_free_page(gfp); | |
163 | if (!new_p4d_page) | |
164 | return NULL; | |
165 | ||
166 | if (pgd_none(*pgd)) { | |
167 | set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); | |
168 | new_p4d_page = 0; | |
169 | } | |
170 | if (new_p4d_page) | |
171 | free_page(new_p4d_page); | |
172 | } | |
173 | BUILD_BUG_ON(pgd_large(*pgd) != 0); | |
174 | ||
175 | return p4d_offset(pgd, address); | |
176 | } | |
177 | ||
178 | /* | |
179 | * Walk the user copy of the page tables (optionally) trying to allocate | |
180 | * page table pages on the way down. | |
181 | * | |
182 | * Returns a pointer to a PMD on success, or NULL on failure. | |
183 | */ | |
184 | static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) | |
185 | { | |
186 | gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); | |
187 | p4d_t *p4d = pti_user_pagetable_walk_p4d(address); | |
188 | pud_t *pud; | |
189 | ||
190 | BUILD_BUG_ON(p4d_large(*p4d) != 0); | |
191 | if (p4d_none(*p4d)) { | |
192 | unsigned long new_pud_page = __get_free_page(gfp); | |
193 | if (!new_pud_page) | |
194 | return NULL; | |
195 | ||
196 | if (p4d_none(*p4d)) { | |
197 | set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page))); | |
198 | new_pud_page = 0; | |
199 | } | |
200 | if (new_pud_page) | |
201 | free_page(new_pud_page); | |
202 | } | |
203 | ||
204 | pud = pud_offset(p4d, address); | |
205 | /* The user page tables do not use large mappings: */ | |
206 | if (pud_large(*pud)) { | |
207 | WARN_ON(1); | |
208 | return NULL; | |
209 | } | |
210 | if (pud_none(*pud)) { | |
211 | unsigned long new_pmd_page = __get_free_page(gfp); | |
212 | if (!new_pmd_page) | |
213 | return NULL; | |
214 | ||
215 | if (pud_none(*pud)) { | |
216 | set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page))); | |
217 | new_pmd_page = 0; | |
218 | } | |
219 | if (new_pmd_page) | |
220 | free_page(new_pmd_page); | |
221 | } | |
222 | ||
223 | return pmd_offset(pud, address); | |
224 | } | |
225 | ||
226 | static void __init | |
227 | pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear) | |
228 | { | |
229 | unsigned long addr; | |
230 | ||
231 | /* | |
232 | * Clone the populated PMDs which cover start to end. These PMD areas | |
233 | * can have holes. | |
234 | */ | |
235 | for (addr = start; addr < end; addr += PMD_SIZE) { | |
236 | pmd_t *pmd, *target_pmd; | |
237 | pgd_t *pgd; | |
238 | p4d_t *p4d; | |
239 | pud_t *pud; | |
240 | ||
241 | pgd = pgd_offset_k(addr); | |
242 | if (WARN_ON(pgd_none(*pgd))) | |
243 | return; | |
244 | p4d = p4d_offset(pgd, addr); | |
245 | if (WARN_ON(p4d_none(*p4d))) | |
246 | return; | |
247 | pud = pud_offset(p4d, addr); | |
248 | if (pud_none(*pud)) | |
249 | continue; | |
250 | pmd = pmd_offset(pud, addr); | |
251 | if (pmd_none(*pmd)) | |
252 | continue; | |
253 | ||
254 | target_pmd = pti_user_pagetable_walk_pmd(addr); | |
255 | if (WARN_ON(!target_pmd)) | |
256 | return; | |
257 | ||
258 | /* | |
259 | * Copy the PMD. That is, the kernelmode and usermode | |
260 | * tables will share the last-level page tables of this | |
261 | * address range | |
262 | */ | |
263 | *target_pmd = pmd_clear_flags(*pmd, clear); | |
264 | } | |
265 | } | |
266 | ||
aa8c6248 TG |
267 | /* |
268 | * Initialize kernel page table isolation | |
269 | */ | |
270 | void __init pti_init(void) | |
271 | { | |
272 | if (!static_cpu_has(X86_FEATURE_PTI)) | |
273 | return; | |
274 | ||
275 | pr_info("enabled\n"); | |
276 | } |