]>
Commit | Line | Data |
---|---|---|
59d5af67 | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
321d628a FG |
2 | From: Thomas Gleixner <tglx@linutronix.de> |
3 | Date: Wed, 20 Dec 2017 18:28:54 +0100 | |
59d5af67 | 4 | Subject: [PATCH] x86/cpu_entry_area: Move it to a separate unit |
321d628a FG |
5 | MIME-Version: 1.0 |
6 | Content-Type: text/plain; charset=UTF-8 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ||
9 | CVE-2017-5754 | |
10 | ||
11 | Separate the cpu_entry_area code out of cpu/common.c and the fixmap. | |
12 | ||
13 | Signed-off-by: Thomas Gleixner <tglx@linutronix.de> | |
14 | Cc: Andy Lutomirski <luto@kernel.org> | |
15 | Cc: Borislav Petkov <bp@alien8.de> | |
16 | Cc: Dave Hansen <dave.hansen@linux.intel.com> | |
17 | Cc: H. Peter Anvin <hpa@zytor.com> | |
18 | Cc: Josh Poimboeuf <jpoimboe@redhat.com> | |
19 | Cc: Juergen Gross <jgross@suse.com> | |
20 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
21 | Cc: Peter Zijlstra <peterz@infradead.org> | |
22 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
23 | (cherry picked from commit ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255) | |
24 | Signed-off-by: Andy Whitcroft <apw@canonical.com> | |
25 | Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> | |
26 | (cherry picked from commit 0fa11d2cd3d67af676aa2762ade282ba6d09cbe5) | |
27 | Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
28 | --- | |
29 | arch/x86/mm/Makefile | 2 +- | |
30 | arch/x86/include/asm/cpu_entry_area.h | 52 +++++++++++++++++ | |
31 | arch/x86/include/asm/fixmap.h | 41 +------------- | |
32 | arch/x86/kernel/cpu/common.c | 94 ------------------------------ | |
33 | arch/x86/kernel/traps.c | 1 + | |
34 | arch/x86/mm/cpu_entry_area.c | 104 ++++++++++++++++++++++++++++++++++ | |
35 | 6 files changed, 159 insertions(+), 135 deletions(-) | |
36 | create mode 100644 arch/x86/include/asm/cpu_entry_area.h | |
37 | create mode 100644 arch/x86/mm/cpu_entry_area.c | |
38 | ||
39 | diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile | |
40 | index 0fbdcb64f9f8..76f5399a8356 100644 | |
41 | --- a/arch/x86/mm/Makefile | |
42 | +++ b/arch/x86/mm/Makefile | |
43 | @@ -2,7 +2,7 @@ | |
44 | KCOV_INSTRUMENT_tlb.o := n | |
45 | ||
46 | obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | |
47 | - pat.o pgtable.o physaddr.o setup_nx.o tlb.o | |
48 | + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o | |
49 | ||
50 | # Make sure __phys_addr has no stackprotector | |
51 | nostackp := $(call cc-option, -fno-stack-protector) | |
52 | diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h | |
53 | new file mode 100644 | |
54 | index 000000000000..5471826803af | |
55 | --- /dev/null | |
56 | +++ b/arch/x86/include/asm/cpu_entry_area.h | |
57 | @@ -0,0 +1,52 @@ | |
58 | +// SPDX-License-Identifier: GPL-2.0 | |
59 | + | |
60 | +#ifndef _ASM_X86_CPU_ENTRY_AREA_H | |
61 | +#define _ASM_X86_CPU_ENTRY_AREA_H | |
62 | + | |
63 | +#include <linux/percpu-defs.h> | |
64 | +#include <asm/processor.h> | |
65 | + | |
66 | +/* | |
67 | + * cpu_entry_area is a percpu region that contains things needed by the CPU | |
68 | + * and early entry/exit code. Real types aren't used for all fields here | |
69 | + * to avoid circular header dependencies. | |
70 | + * | |
71 | + * Every field is a virtual alias of some other allocated backing store. | |
72 | + * There is no direct allocation of a struct cpu_entry_area. | |
73 | + */ | |
74 | +struct cpu_entry_area { | |
75 | + char gdt[PAGE_SIZE]; | |
76 | + | |
77 | + /* | |
78 | + * The GDT is just below entry_stack and thus serves (on x86_64) as | |
79 | + * a a read-only guard page. | |
80 | + */ | |
81 | + struct entry_stack_page entry_stack_page; | |
82 | + | |
83 | + /* | |
84 | + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because | |
85 | + * we need task switches to work, and task switches write to the TSS. | |
86 | + */ | |
87 | + struct tss_struct tss; | |
88 | + | |
89 | + char entry_trampoline[PAGE_SIZE]; | |
90 | + | |
91 | +#ifdef CONFIG_X86_64 | |
92 | + /* | |
93 | + * Exception stacks used for IST entries. | |
94 | + * | |
95 | + * In the future, this should have a separate slot for each stack | |
96 | + * with guard pages between them. | |
97 | + */ | |
98 | + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; | |
99 | +#endif | |
100 | +}; | |
101 | + | |
102 | +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) | |
103 | +#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE) | |
104 | + | |
105 | +DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); | |
106 | + | |
107 | +extern void setup_cpu_entry_areas(void); | |
108 | + | |
109 | +#endif | |
110 | diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h | |
111 | index a7fb137ad964..1b2521473480 100644 | |
112 | --- a/arch/x86/include/asm/fixmap.h | |
113 | +++ b/arch/x86/include/asm/fixmap.h | |
114 | @@ -25,6 +25,7 @@ | |
115 | #else | |
116 | #include <uapi/asm/vsyscall.h> | |
117 | #endif | |
118 | +#include <asm/cpu_entry_area.h> | |
119 | ||
120 | /* | |
121 | * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall | |
122 | @@ -44,46 +45,6 @@ extern unsigned long __FIXADDR_TOP; | |
123 | PAGE_SIZE) | |
124 | #endif | |
125 | ||
126 | -/* | |
127 | - * cpu_entry_area is a percpu region in the fixmap that contains things | |
128 | - * needed by the CPU and early entry/exit code. Real types aren't used | |
129 | - * for all fields here to avoid circular header dependencies. | |
130 | - * | |
131 | - * Every field is a virtual alias of some other allocated backing store. | |
132 | - * There is no direct allocation of a struct cpu_entry_area. | |
133 | - */ | |
134 | -struct cpu_entry_area { | |
135 | - char gdt[PAGE_SIZE]; | |
136 | - | |
137 | - /* | |
138 | - * The GDT is just below entry_stack and thus serves (on x86_64) as | |
139 | - * a a read-only guard page. | |
140 | - */ | |
141 | - struct entry_stack_page entry_stack_page; | |
142 | - | |
143 | - /* | |
144 | - * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because | |
145 | - * we need task switches to work, and task switches write to the TSS. | |
146 | - */ | |
147 | - struct tss_struct tss; | |
148 | - | |
149 | - char entry_trampoline[PAGE_SIZE]; | |
150 | - | |
151 | -#ifdef CONFIG_X86_64 | |
152 | - /* | |
153 | - * Exception stacks used for IST entries. | |
154 | - * | |
155 | - * In the future, this should have a separate slot for each stack | |
156 | - * with guard pages between them. | |
157 | - */ | |
158 | - char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]; | |
159 | -#endif | |
160 | -}; | |
161 | - | |
162 | -#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) | |
163 | - | |
164 | -extern void setup_cpu_entry_areas(void); | |
165 | - | |
166 | /* | |
167 | * Here we define all the compile-time 'special' virtual | |
168 | * addresses. The point is to have a constant address at | |
169 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c | |
170 | index 7a8a5d436566..96171ce46d61 100644 | |
171 | --- a/arch/x86/kernel/cpu/common.c | |
172 | +++ b/arch/x86/kernel/cpu/common.c | |
173 | @@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { | |
174 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ, | |
175 | [DEBUG_STACK - 1] = DEBUG_STKSZ | |
176 | }; | |
177 | - | |
178 | -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | |
179 | - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | |
180 | -#endif | |
181 | - | |
182 | -static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, | |
183 | - entry_stack_storage); | |
184 | - | |
185 | -static void __init | |
186 | -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) | |
187 | -{ | |
188 | - for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) | |
189 | - __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); | |
190 | -} | |
191 | - | |
192 | -/* Setup the fixmap mappings only once per-processor */ | |
193 | -static void __init setup_cpu_entry_area(int cpu) | |
194 | -{ | |
195 | -#ifdef CONFIG_X86_64 | |
196 | - extern char _entry_trampoline[]; | |
197 | - | |
198 | - /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ | |
199 | - pgprot_t gdt_prot = PAGE_KERNEL_RO; | |
200 | - pgprot_t tss_prot = PAGE_KERNEL_RO; | |
201 | -#else | |
202 | - /* | |
203 | - * On native 32-bit systems, the GDT cannot be read-only because | |
204 | - * our double fault handler uses a task gate, and entering through | |
205 | - * a task gate needs to change an available TSS to busy. If the | |
206 | - * GDT is read-only, that will triple fault. The TSS cannot be | |
207 | - * read-only because the CPU writes to it on task switches. | |
208 | - * | |
209 | - * On Xen PV, the GDT must be read-only because the hypervisor | |
210 | - * requires it. | |
211 | - */ | |
212 | - pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? | |
213 | - PAGE_KERNEL_RO : PAGE_KERNEL; | |
214 | - pgprot_t tss_prot = PAGE_KERNEL; | |
215 | -#endif | |
216 | - | |
217 | - __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); | |
218 | - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), | |
219 | - per_cpu_ptr(&entry_stack_storage, cpu), 1, | |
220 | - PAGE_KERNEL); | |
221 | - | |
222 | - /* | |
223 | - * The Intel SDM says (Volume 3, 7.2.1): | |
224 | - * | |
225 | - * Avoid placing a page boundary in the part of the TSS that the | |
226 | - * processor reads during a task switch (the first 104 bytes). The | |
227 | - * processor may not correctly perform address translations if a | |
228 | - * boundary occurs in this area. During a task switch, the processor | |
229 | - * reads and writes into the first 104 bytes of each TSS (using | |
230 | - * contiguous physical addresses beginning with the physical address | |
231 | - * of the first byte of the TSS). So, after TSS access begins, if | |
232 | - * part of the 104 bytes is not physically contiguous, the processor | |
233 | - * will access incorrect information without generating a page-fault | |
234 | - * exception. | |
235 | - * | |
236 | - * There are also a lot of errata involving the TSS spanning a page | |
237 | - * boundary. Assert that we're not doing that. | |
238 | - */ | |
239 | - BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ | |
240 | - offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); | |
241 | - BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); | |
242 | - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), | |
243 | - &per_cpu(cpu_tss_rw, cpu), | |
244 | - sizeof(struct tss_struct) / PAGE_SIZE, | |
245 | - tss_prot); | |
246 | - | |
247 | -#ifdef CONFIG_X86_32 | |
248 | - per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); | |
249 | #endif | |
250 | ||
251 | -#ifdef CONFIG_X86_64 | |
252 | - BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); | |
253 | - BUILD_BUG_ON(sizeof(exception_stacks) != | |
254 | - sizeof(((struct cpu_entry_area *)0)->exception_stacks)); | |
255 | - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), | |
256 | - &per_cpu(exception_stacks, cpu), | |
257 | - sizeof(exception_stacks) / PAGE_SIZE, | |
258 | - PAGE_KERNEL); | |
259 | - | |
260 | - __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), | |
261 | - __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); | |
262 | -#endif | |
263 | -} | |
264 | - | |
265 | -void __init setup_cpu_entry_areas(void) | |
266 | -{ | |
267 | - unsigned int cpu; | |
268 | - | |
269 | - for_each_possible_cpu(cpu) | |
270 | - setup_cpu_entry_area(cpu); | |
271 | -} | |
272 | - | |
273 | /* Load the original GDT from the per-cpu structure */ | |
274 | void load_direct_gdt(int cpu) | |
275 | { | |
276 | diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c | |
277 | index 14b462eefa17..ef2d1b8a0516 100644 | |
278 | --- a/arch/x86/kernel/traps.c | |
279 | +++ b/arch/x86/kernel/traps.c | |
280 | @@ -57,6 +57,7 @@ | |
281 | #include <asm/traps.h> | |
282 | #include <asm/desc.h> | |
283 | #include <asm/fpu/internal.h> | |
284 | +#include <asm/cpu_entry_area.h> | |
285 | #include <asm/mce.h> | |
286 | #include <asm/fixmap.h> | |
287 | #include <asm/mach_traps.h> | |
288 | diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c | |
289 | new file mode 100644 | |
290 | index 000000000000..235ff9cfaaf4 | |
291 | --- /dev/null | |
292 | +++ b/arch/x86/mm/cpu_entry_area.c | |
293 | @@ -0,0 +1,104 @@ | |
294 | +// SPDX-License-Identifier: GPL-2.0 | |
295 | + | |
296 | +#include <linux/spinlock.h> | |
297 | +#include <linux/percpu.h> | |
298 | + | |
299 | +#include <asm/cpu_entry_area.h> | |
300 | +#include <asm/pgtable.h> | |
301 | +#include <asm/fixmap.h> | |
302 | +#include <asm/desc.h> | |
303 | + | |
304 | +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); | |
305 | + | |
306 | +#ifdef CONFIG_X86_64 | |
307 | +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | |
308 | + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); | |
309 | +#endif | |
310 | + | |
311 | +static void __init | |
312 | +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot) | |
313 | +{ | |
314 | + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE) | |
315 | + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot); | |
316 | +} | |
317 | + | |
318 | +/* Setup the fixmap mappings only once per-processor */ | |
319 | +static void __init setup_cpu_entry_area(int cpu) | |
320 | +{ | |
321 | +#ifdef CONFIG_X86_64 | |
322 | + extern char _entry_trampoline[]; | |
323 | + | |
324 | + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */ | |
325 | + pgprot_t gdt_prot = PAGE_KERNEL_RO; | |
326 | + pgprot_t tss_prot = PAGE_KERNEL_RO; | |
327 | +#else | |
328 | + /* | |
329 | + * On native 32-bit systems, the GDT cannot be read-only because | |
330 | + * our double fault handler uses a task gate, and entering through | |
331 | + * a task gate needs to change an available TSS to busy. If the | |
332 | + * GDT is read-only, that will triple fault. The TSS cannot be | |
333 | + * read-only because the CPU writes to it on task switches. | |
334 | + * | |
335 | + * On Xen PV, the GDT must be read-only because the hypervisor | |
336 | + * requires it. | |
337 | + */ | |
338 | + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ? | |
339 | + PAGE_KERNEL_RO : PAGE_KERNEL; | |
340 | + pgprot_t tss_prot = PAGE_KERNEL; | |
341 | +#endif | |
342 | + | |
343 | + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot); | |
344 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page), | |
345 | + per_cpu_ptr(&entry_stack_storage, cpu), 1, | |
346 | + PAGE_KERNEL); | |
347 | + | |
348 | + /* | |
349 | + * The Intel SDM says (Volume 3, 7.2.1): | |
350 | + * | |
351 | + * Avoid placing a page boundary in the part of the TSS that the | |
352 | + * processor reads during a task switch (the first 104 bytes). The | |
353 | + * processor may not correctly perform address translations if a | |
354 | + * boundary occurs in this area. During a task switch, the processor | |
355 | + * reads and writes into the first 104 bytes of each TSS (using | |
356 | + * contiguous physical addresses beginning with the physical address | |
357 | + * of the first byte of the TSS). So, after TSS access begins, if | |
358 | + * part of the 104 bytes is not physically contiguous, the processor | |
359 | + * will access incorrect information without generating a page-fault | |
360 | + * exception. | |
361 | + * | |
362 | + * There are also a lot of errata involving the TSS spanning a page | |
363 | + * boundary. Assert that we're not doing that. | |
364 | + */ | |
365 | + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ | |
366 | + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); | |
367 | + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); | |
368 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), | |
369 | + &per_cpu(cpu_tss_rw, cpu), | |
370 | + sizeof(struct tss_struct) / PAGE_SIZE, | |
371 | + tss_prot); | |
372 | + | |
373 | +#ifdef CONFIG_X86_32 | |
374 | + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu); | |
375 | +#endif | |
376 | + | |
377 | +#ifdef CONFIG_X86_64 | |
378 | + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0); | |
379 | + BUILD_BUG_ON(sizeof(exception_stacks) != | |
380 | + sizeof(((struct cpu_entry_area *)0)->exception_stacks)); | |
381 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks), | |
382 | + &per_cpu(exception_stacks, cpu), | |
383 | + sizeof(exception_stacks) / PAGE_SIZE, | |
384 | + PAGE_KERNEL); | |
385 | + | |
386 | + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline), | |
387 | + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX); | |
388 | +#endif | |
389 | +} | |
390 | + | |
391 | +void __init setup_cpu_entry_areas(void) | |
392 | +{ | |
393 | + unsigned int cpu; | |
394 | + | |
395 | + for_each_possible_cpu(cpu) | |
396 | + setup_cpu_entry_area(cpu); | |
397 | +} | |
398 | -- | |
399 | 2.14.2 | |
400 |