]> git.proxmox.com Git - pve-kernel.git/blob - patches/kernel/0183-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch
ea2defa2bc3dea2aade1b2fe29e6d7a4668936e8
[pve-kernel.git] / patches / kernel / 0183-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch
1 From c03a5cb44d69723a8a2aa0b3b4808d28ea749431 Mon Sep 17 00:00:00 2001
2 From: Thomas Gleixner <tglx@linutronix.de>
3 Date: Wed, 20 Dec 2017 18:28:54 +0100
4 Subject: [PATCH 183/233] x86/cpu_entry_area: Move it to a separate unit
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 CVE-2017-5754
10
11 Separate the cpu_entry_area code out of cpu/common.c and the fixmap.
12
13 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
14 Cc: Andy Lutomirski <luto@kernel.org>
15 Cc: Borislav Petkov <bp@alien8.de>
16 Cc: Dave Hansen <dave.hansen@linux.intel.com>
17 Cc: H. Peter Anvin <hpa@zytor.com>
18 Cc: Josh Poimboeuf <jpoimboe@redhat.com>
19 Cc: Juergen Gross <jgross@suse.com>
20 Cc: Linus Torvalds <torvalds@linux-foundation.org>
21 Cc: Peter Zijlstra <peterz@infradead.org>
22 Signed-off-by: Ingo Molnar <mingo@kernel.org>
23 (cherry picked from commit ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255)
24 Signed-off-by: Andy Whitcroft <apw@canonical.com>
25 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
26 (cherry picked from commit 0fa11d2cd3d67af676aa2762ade282ba6d09cbe5)
27 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
28 ---
29 arch/x86/mm/Makefile | 2 +-
30 arch/x86/include/asm/cpu_entry_area.h | 52 +++++++++++++++++
31 arch/x86/include/asm/fixmap.h | 41 +-------------
32 arch/x86/kernel/cpu/common.c | 94 ------------------------------
33 arch/x86/kernel/traps.c | 1 +
34 arch/x86/mm/cpu_entry_area.c | 104 ++++++++++++++++++++++++++++++++++
35 6 files changed, 159 insertions(+), 135 deletions(-)
36 create mode 100644 arch/x86/include/asm/cpu_entry_area.h
37 create mode 100644 arch/x86/mm/cpu_entry_area.c
38
39 diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
40 index 0fbdcb64f9f8..76f5399a8356 100644
41 --- a/arch/x86/mm/Makefile
42 +++ b/arch/x86/mm/Makefile
43 @@ -2,7 +2,7 @@
44 KCOV_INSTRUMENT_tlb.o := n
45
46 obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
47 - pat.o pgtable.o physaddr.o setup_nx.o tlb.o
48 + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
49
50 # Make sure __phys_addr has no stackprotector
51 nostackp := $(call cc-option, -fno-stack-protector)
52 diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
53 new file mode 100644
54 index 000000000000..5471826803af
55 --- /dev/null
56 +++ b/arch/x86/include/asm/cpu_entry_area.h
57 @@ -0,0 +1,52 @@
58 +// SPDX-License-Identifier: GPL-2.0
59 +
60 +#ifndef _ASM_X86_CPU_ENTRY_AREA_H
61 +#define _ASM_X86_CPU_ENTRY_AREA_H
62 +
63 +#include <linux/percpu-defs.h>
64 +#include <asm/processor.h>
65 +
66 +/*
67 + * cpu_entry_area is a percpu region that contains things needed by the CPU
68 + * and early entry/exit code. Real types aren't used for all fields here
69 + * to avoid circular header dependencies.
70 + *
71 + * Every field is a virtual alias of some other allocated backing store.
72 + * There is no direct allocation of a struct cpu_entry_area.
73 + */
74 +struct cpu_entry_area {
75 + char gdt[PAGE_SIZE];
76 +
77 + /*
78 + * The GDT is just below entry_stack and thus serves (on x86_64) as
79 + * a a read-only guard page.
80 + */
81 + struct entry_stack_page entry_stack_page;
82 +
83 + /*
84 + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
85 + * we need task switches to work, and task switches write to the TSS.
86 + */
87 + struct tss_struct tss;
88 +
89 + char entry_trampoline[PAGE_SIZE];
90 +
91 +#ifdef CONFIG_X86_64
92 + /*
93 + * Exception stacks used for IST entries.
94 + *
95 + * In the future, this should have a separate slot for each stack
96 + * with guard pages between them.
97 + */
98 + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
99 +#endif
100 +};
101 +
102 +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
103 +#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
104 +
105 +DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
106 +
107 +extern void setup_cpu_entry_areas(void);
108 +
109 +#endif
110 diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
111 index a7fb137ad964..1b2521473480 100644
112 --- a/arch/x86/include/asm/fixmap.h
113 +++ b/arch/x86/include/asm/fixmap.h
114 @@ -25,6 +25,7 @@
115 #else
116 #include <uapi/asm/vsyscall.h>
117 #endif
118 +#include <asm/cpu_entry_area.h>
119
120 /*
121 * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
122 @@ -44,46 +45,6 @@ extern unsigned long __FIXADDR_TOP;
123 PAGE_SIZE)
124 #endif
125
126 -/*
127 - * cpu_entry_area is a percpu region in the fixmap that contains things
128 - * needed by the CPU and early entry/exit code. Real types aren't used
129 - * for all fields here to avoid circular header dependencies.
130 - *
131 - * Every field is a virtual alias of some other allocated backing store.
132 - * There is no direct allocation of a struct cpu_entry_area.
133 - */
134 -struct cpu_entry_area {
135 - char gdt[PAGE_SIZE];
136 -
137 - /*
138 - * The GDT is just below entry_stack and thus serves (on x86_64) as
139 - * a a read-only guard page.
140 - */
141 - struct entry_stack_page entry_stack_page;
142 -
143 - /*
144 - * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
145 - * we need task switches to work, and task switches write to the TSS.
146 - */
147 - struct tss_struct tss;
148 -
149 - char entry_trampoline[PAGE_SIZE];
150 -
151 -#ifdef CONFIG_X86_64
152 - /*
153 - * Exception stacks used for IST entries.
154 - *
155 - * In the future, this should have a separate slot for each stack
156 - * with guard pages between them.
157 - */
158 - char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
159 -#endif
160 -};
161 -
162 -#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
163 -
164 -extern void setup_cpu_entry_areas(void);
165 -
166 /*
167 * Here we define all the compile-time 'special' virtual
168 * addresses. The point is to have a constant address at
169 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
170 index 7a8a5d436566..96171ce46d61 100644
171 --- a/arch/x86/kernel/cpu/common.c
172 +++ b/arch/x86/kernel/cpu/common.c
173 @@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
174 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
175 [DEBUG_STACK - 1] = DEBUG_STKSZ
176 };
177 -
178 -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
179 - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
180 -#endif
181 -
182 -static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
183 - entry_stack_storage);
184 -
185 -static void __init
186 -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
187 -{
188 - for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
189 - __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
190 -}
191 -
192 -/* Setup the fixmap mappings only once per-processor */
193 -static void __init setup_cpu_entry_area(int cpu)
194 -{
195 -#ifdef CONFIG_X86_64
196 - extern char _entry_trampoline[];
197 -
198 - /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
199 - pgprot_t gdt_prot = PAGE_KERNEL_RO;
200 - pgprot_t tss_prot = PAGE_KERNEL_RO;
201 -#else
202 - /*
203 - * On native 32-bit systems, the GDT cannot be read-only because
204 - * our double fault handler uses a task gate, and entering through
205 - * a task gate needs to change an available TSS to busy. If the
206 - * GDT is read-only, that will triple fault. The TSS cannot be
207 - * read-only because the CPU writes to it on task switches.
208 - *
209 - * On Xen PV, the GDT must be read-only because the hypervisor
210 - * requires it.
211 - */
212 - pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
213 - PAGE_KERNEL_RO : PAGE_KERNEL;
214 - pgprot_t tss_prot = PAGE_KERNEL;
215 -#endif
216 -
217 - __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
218 - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
219 - per_cpu_ptr(&entry_stack_storage, cpu), 1,
220 - PAGE_KERNEL);
221 -
222 - /*
223 - * The Intel SDM says (Volume 3, 7.2.1):
224 - *
225 - * Avoid placing a page boundary in the part of the TSS that the
226 - * processor reads during a task switch (the first 104 bytes). The
227 - * processor may not correctly perform address translations if a
228 - * boundary occurs in this area. During a task switch, the processor
229 - * reads and writes into the first 104 bytes of each TSS (using
230 - * contiguous physical addresses beginning with the physical address
231 - * of the first byte of the TSS). So, after TSS access begins, if
232 - * part of the 104 bytes is not physically contiguous, the processor
233 - * will access incorrect information without generating a page-fault
234 - * exception.
235 - *
236 - * There are also a lot of errata involving the TSS spanning a page
237 - * boundary. Assert that we're not doing that.
238 - */
239 - BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
240 - offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
241 - BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
242 - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
243 - &per_cpu(cpu_tss_rw, cpu),
244 - sizeof(struct tss_struct) / PAGE_SIZE,
245 - tss_prot);
246 -
247 -#ifdef CONFIG_X86_32
248 - per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
249 #endif
250
251 -#ifdef CONFIG_X86_64
252 - BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
253 - BUILD_BUG_ON(sizeof(exception_stacks) !=
254 - sizeof(((struct cpu_entry_area *)0)->exception_stacks));
255 - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
256 - &per_cpu(exception_stacks, cpu),
257 - sizeof(exception_stacks) / PAGE_SIZE,
258 - PAGE_KERNEL);
259 -
260 - __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
261 - __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
262 -#endif
263 -}
264 -
265 -void __init setup_cpu_entry_areas(void)
266 -{
267 - unsigned int cpu;
268 -
269 - for_each_possible_cpu(cpu)
270 - setup_cpu_entry_area(cpu);
271 -}
272 -
273 /* Load the original GDT from the per-cpu structure */
274 void load_direct_gdt(int cpu)
275 {
276 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
277 index 14b462eefa17..ef2d1b8a0516 100644
278 --- a/arch/x86/kernel/traps.c
279 +++ b/arch/x86/kernel/traps.c
280 @@ -57,6 +57,7 @@
281 #include <asm/traps.h>
282 #include <asm/desc.h>
283 #include <asm/fpu/internal.h>
284 +#include <asm/cpu_entry_area.h>
285 #include <asm/mce.h>
286 #include <asm/fixmap.h>
287 #include <asm/mach_traps.h>
288 diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
289 new file mode 100644
290 index 000000000000..235ff9cfaaf4
291 --- /dev/null
292 +++ b/arch/x86/mm/cpu_entry_area.c
293 @@ -0,0 +1,104 @@
294 +// SPDX-License-Identifier: GPL-2.0
295 +
296 +#include <linux/spinlock.h>
297 +#include <linux/percpu.h>
298 +
299 +#include <asm/cpu_entry_area.h>
300 +#include <asm/pgtable.h>
301 +#include <asm/fixmap.h>
302 +#include <asm/desc.h>
303 +
304 +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
305 +
306 +#ifdef CONFIG_X86_64
307 +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
308 + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
309 +#endif
310 +
311 +static void __init
312 +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
313 +{
314 + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
315 + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
316 +}
317 +
318 +/* Setup the fixmap mappings only once per-processor */
319 +static void __init setup_cpu_entry_area(int cpu)
320 +{
321 +#ifdef CONFIG_X86_64
322 + extern char _entry_trampoline[];
323 +
324 + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
325 + pgprot_t gdt_prot = PAGE_KERNEL_RO;
326 + pgprot_t tss_prot = PAGE_KERNEL_RO;
327 +#else
328 + /*
329 + * On native 32-bit systems, the GDT cannot be read-only because
330 + * our double fault handler uses a task gate, and entering through
331 + * a task gate needs to change an available TSS to busy. If the
332 + * GDT is read-only, that will triple fault. The TSS cannot be
333 + * read-only because the CPU writes to it on task switches.
334 + *
335 + * On Xen PV, the GDT must be read-only because the hypervisor
336 + * requires it.
337 + */
338 + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
339 + PAGE_KERNEL_RO : PAGE_KERNEL;
340 + pgprot_t tss_prot = PAGE_KERNEL;
341 +#endif
342 +
343 + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
344 + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
345 + per_cpu_ptr(&entry_stack_storage, cpu), 1,
346 + PAGE_KERNEL);
347 +
348 + /*
349 + * The Intel SDM says (Volume 3, 7.2.1):
350 + *
351 + * Avoid placing a page boundary in the part of the TSS that the
352 + * processor reads during a task switch (the first 104 bytes). The
353 + * processor may not correctly perform address translations if a
354 + * boundary occurs in this area. During a task switch, the processor
355 + * reads and writes into the first 104 bytes of each TSS (using
356 + * contiguous physical addresses beginning with the physical address
357 + * of the first byte of the TSS). So, after TSS access begins, if
358 + * part of the 104 bytes is not physically contiguous, the processor
359 + * will access incorrect information without generating a page-fault
360 + * exception.
361 + *
362 + * There are also a lot of errata involving the TSS spanning a page
363 + * boundary. Assert that we're not doing that.
364 + */
365 + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
366 + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
367 + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
368 + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
369 + &per_cpu(cpu_tss_rw, cpu),
370 + sizeof(struct tss_struct) / PAGE_SIZE,
371 + tss_prot);
372 +
373 +#ifdef CONFIG_X86_32
374 + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
375 +#endif
376 +
377 +#ifdef CONFIG_X86_64
378 + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
379 + BUILD_BUG_ON(sizeof(exception_stacks) !=
380 + sizeof(((struct cpu_entry_area *)0)->exception_stacks));
381 + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
382 + &per_cpu(exception_stacks, cpu),
383 + sizeof(exception_stacks) / PAGE_SIZE,
384 + PAGE_KERNEL);
385 +
386 + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
387 + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
388 +#endif
389 +}
390 +
391 +void __init setup_cpu_entry_areas(void)
392 +{
393 + unsigned int cpu;
394 +
395 + for_each_possible_cpu(cpu)
396 + setup_cpu_entry_area(cpu);
397 +}
398 --
399 2.14.2
400