]>
Commit | Line | Data |
---|---|---|
59d5af67 | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
321d628a FG |
2 | From: Dave Hansen <dave.hansen@linux.intel.com> |
3 | Date: Mon, 4 Dec 2017 15:07:37 +0100 | |
59d5af67 | 4 | Subject: [PATCH] x86/mm/pti: Add mapping helper functions |
321d628a FG |
5 | MIME-Version: 1.0 |
6 | Content-Type: text/plain; charset=UTF-8 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ||
9 | CVE-2017-5754 | |
10 | ||
11 | Add the pagetable helper functions do manage the separate user space page | |
12 | tables. | |
13 | ||
14 | [ tglx: Split out from the big combo kaiser patch. Folded Andys | |
15 | simplification and made it out of line as Boris suggested ] | |
16 | ||
17 | Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> | |
18 | Signed-off-by: Thomas Gleixner <tglx@linutronix.de> | |
19 | Cc: Andy Lutomirski <luto@kernel.org> | |
20 | Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> | |
21 | Cc: Borislav Petkov <bp@alien8.de> | |
22 | Cc: Brian Gerst <brgerst@gmail.com> | |
23 | Cc: David Laight <David.Laight@aculab.com> | |
24 | Cc: Denys Vlasenko <dvlasenk@redhat.com> | |
25 | Cc: Eduardo Valentin <eduval@amazon.com> | |
26 | Cc: Greg KH <gregkh@linuxfoundation.org> | |
27 | Cc: H. Peter Anvin <hpa@zytor.com> | |
28 | Cc: Josh Poimboeuf <jpoimboe@redhat.com> | |
29 | Cc: Juergen Gross <jgross@suse.com> | |
30 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
31 | Cc: Peter Zijlstra <peterz@infradead.org> | |
32 | Cc: Will Deacon <will.deacon@arm.com> | |
33 | Cc: aliguori@amazon.com | |
34 | Cc: daniel.gruss@iaik.tugraz.at | |
35 | Cc: hughd@google.com | |
36 | Cc: keescook@google.com | |
37 | Cc: linux-kernel@vger.kernel.org | |
38 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
39 | (cherry picked from commit 61e9b3671007a5da8127955a1a3bda7e0d5f42e8) | |
40 | Signed-off-by: Andy Whitcroft <apw@canonical.com> | |
41 | Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> | |
42 | (cherry picked from commit fb45c59197f3134db6e223bb4fec0529774c07e1) | |
43 | Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
44 | --- | |
45 | arch/x86/include/asm/pgtable.h | 6 ++- | |
46 | arch/x86/include/asm/pgtable_64.h | 92 +++++++++++++++++++++++++++++++++++++++ | |
47 | arch/x86/mm/pti.c | 41 +++++++++++++++++ | |
48 | 3 files changed, 138 insertions(+), 1 deletion(-) | |
49 | ||
50 | diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h | |
51 | index bb8e9ea7deb4..abbb47c75467 100644 | |
52 | --- a/arch/x86/include/asm/pgtable.h | |
53 | +++ b/arch/x86/include/asm/pgtable.h | |
54 | @@ -894,7 +894,11 @@ static inline int pgd_none(pgd_t pgd) | |
55 | * pgd_offset() returns a (pgd_t *) | |
56 | * pgd_index() is used get the offset into the pgd page's array of pgd_t's; | |
57 | */ | |
58 | -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address))) | |
59 | +#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address))) | |
60 | +/* | |
61 | + * a shortcut to get a pgd_t in a given mm | |
62 | + */ | |
63 | +#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address)) | |
64 | /* | |
65 | * a shortcut which implies the use of the kernel's pgd, instead | |
66 | * of a process's | |
67 | diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h | |
68 | index 2160c1fee920..1ac15b03cf30 100644 | |
69 | --- a/arch/x86/include/asm/pgtable_64.h | |
70 | +++ b/arch/x86/include/asm/pgtable_64.h | |
71 | @@ -130,9 +130,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp) | |
72 | #endif | |
73 | } | |
74 | ||
75 | +#ifdef CONFIG_PAGE_TABLE_ISOLATION | |
76 | +/* | |
77 | + * All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages | |
78 | + * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and | |
79 | + * the user one is in the last 4k. To switch between them, you | |
80 | + * just need to flip the 12th bit in their addresses. | |
81 | + */ | |
82 | +#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT | |
83 | + | |
84 | +/* | |
85 | + * This generates better code than the inline assembly in | |
86 | + * __set_bit(). | |
87 | + */ | |
88 | +static inline void *ptr_set_bit(void *ptr, int bit) | |
89 | +{ | |
90 | + unsigned long __ptr = (unsigned long)ptr; | |
91 | + | |
92 | + __ptr |= BIT(bit); | |
93 | + return (void *)__ptr; | |
94 | +} | |
95 | +static inline void *ptr_clear_bit(void *ptr, int bit) | |
96 | +{ | |
97 | + unsigned long __ptr = (unsigned long)ptr; | |
98 | + | |
99 | + __ptr &= ~BIT(bit); | |
100 | + return (void *)__ptr; | |
101 | +} | |
102 | + | |
103 | +static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp) | |
104 | +{ | |
105 | + return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); | |
106 | +} | |
107 | + | |
108 | +static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp) | |
109 | +{ | |
110 | + return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT); | |
111 | +} | |
112 | + | |
113 | +static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp) | |
114 | +{ | |
115 | + return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); | |
116 | +} | |
117 | + | |
118 | +static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp) | |
119 | +{ | |
120 | + return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT); | |
121 | +} | |
122 | +#endif /* CONFIG_PAGE_TABLE_ISOLATION */ | |
123 | + | |
124 | +/* | |
125 | + * Page table pages are page-aligned. The lower half of the top | |
126 | + * level is used for userspace and the top half for the kernel. | |
127 | + * | |
128 | + * Returns true for parts of the PGD that map userspace and | |
129 | + * false for the parts that map the kernel. | |
130 | + */ | |
131 | +static inline bool pgdp_maps_userspace(void *__ptr) | |
132 | +{ | |
133 | + unsigned long ptr = (unsigned long)__ptr; | |
134 | + | |
135 | + return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2); | |
136 | +} | |
137 | + | |
138 | +#ifdef CONFIG_PAGE_TABLE_ISOLATION | |
139 | +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd); | |
140 | + | |
141 | +/* | |
142 | + * Take a PGD location (pgdp) and a pgd value that needs to be set there. | |
143 | + * Populates the user and returns the resulting PGD that must be set in | |
144 | + * the kernel copy of the page tables. | |
145 | + */ | |
146 | +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) | |
147 | +{ | |
148 | + if (!static_cpu_has(X86_FEATURE_PTI)) | |
149 | + return pgd; | |
150 | + return __pti_set_user_pgd(pgdp, pgd); | |
151 | +} | |
152 | +#else | |
153 | +static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) | |
154 | +{ | |
155 | + return pgd; | |
156 | +} | |
157 | +#endif | |
158 | + | |
159 | static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) | |
160 | { | |
161 | +#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) | |
162 | + p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); | |
163 | +#else | |
164 | *p4dp = p4d; | |
165 | +#endif | |
166 | } | |
167 | ||
168 | static inline void native_p4d_clear(p4d_t *p4d) | |
169 | @@ -146,7 +234,11 @@ static inline void native_p4d_clear(p4d_t *p4d) | |
170 | ||
171 | static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) | |
172 | { | |
173 | +#ifdef CONFIG_PAGE_TABLE_ISOLATION | |
174 | + *pgdp = pti_set_user_pgd(pgdp, pgd); | |
175 | +#else | |
176 | *pgdp = pgd; | |
177 | +#endif | |
178 | } | |
179 | ||
180 | static inline void native_pgd_clear(pgd_t *pgd) | |
181 | diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c | |
182 | index a13f6b109865..69a983365392 100644 | |
183 | --- a/arch/x86/mm/pti.c | |
184 | +++ b/arch/x86/mm/pti.c | |
185 | @@ -96,6 +96,47 @@ void __init pti_check_boottime_disable(void) | |
186 | setup_force_cpu_cap(X86_FEATURE_PTI); | |
187 | } | |
188 | ||
189 | +pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) | |
190 | +{ | |
191 | + /* | |
192 | + * Changes to the high (kernel) portion of the kernelmode page | |
193 | + * tables are not automatically propagated to the usermode tables. | |
194 | + * | |
195 | + * Users should keep in mind that, unlike the kernelmode tables, | |
196 | + * there is no vmalloc_fault equivalent for the usermode tables. | |
197 | + * Top-level entries added to init_mm's usermode pgd after boot | |
198 | + * will not be automatically propagated to other mms. | |
199 | + */ | |
200 | + if (!pgdp_maps_userspace(pgdp)) | |
201 | + return pgd; | |
202 | + | |
203 | + /* | |
204 | + * The user page tables get the full PGD, accessible from | |
205 | + * userspace: | |
206 | + */ | |
207 | + kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd; | |
208 | + | |
209 | + /* | |
210 | + * If this is normal user memory, make it NX in the kernel | |
211 | + * pagetables so that, if we somehow screw up and return to | |
212 | + * usermode with the kernel CR3 loaded, we'll get a page fault | |
213 | + * instead of allowing user code to execute with the wrong CR3. | |
214 | + * | |
215 | + * As exceptions, we don't set NX if: | |
216 | + * - _PAGE_USER is not set. This could be an executable | |
217 | + * EFI runtime mapping or something similar, and the kernel | |
218 | + * may execute from it | |
219 | + * - we don't have NX support | |
220 | + * - we're clearing the PGD (i.e. the new pgd is not present). | |
221 | + */ | |
222 | + if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) && | |
223 | + (__supported_pte_mask & _PAGE_NX)) | |
224 | + pgd.pgd |= _PAGE_NX; | |
225 | + | |
226 | + /* return the copy of the PGD we want the kernel to use: */ | |
227 | + return pgd; | |
228 | +} | |
229 | + | |
230 | /* | |
231 | * Initialize kernel page table isolation | |
232 | */ | |
233 | -- | |
234 | 2.14.2 | |
235 |