]> git.proxmox.com Git - pve-kernel.git/blame - patches/kernel/0183-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch
KPTI: add follow-up fixes
[pve-kernel.git] / patches / kernel / 0183-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch
CommitLineData
321d628a
FG
1From c03a5cb44d69723a8a2aa0b3b4808d28ea749431 Mon Sep 17 00:00:00 2001
2From: Thomas Gleixner <tglx@linutronix.de>
3Date: Wed, 20 Dec 2017 18:28:54 +0100
e4cdf2a5 4Subject: [PATCH 183/241] x86/cpu_entry_area: Move it to a separate unit
321d628a
FG
5MIME-Version: 1.0
6Content-Type: text/plain; charset=UTF-8
7Content-Transfer-Encoding: 8bit
8
9CVE-2017-5754
10
11Separate the cpu_entry_area code out of cpu/common.c and the fixmap.
12
13Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
14Cc: Andy Lutomirski <luto@kernel.org>
15Cc: Borislav Petkov <bp@alien8.de>
16Cc: Dave Hansen <dave.hansen@linux.intel.com>
17Cc: H. Peter Anvin <hpa@zytor.com>
18Cc: Josh Poimboeuf <jpoimboe@redhat.com>
19Cc: Juergen Gross <jgross@suse.com>
20Cc: Linus Torvalds <torvalds@linux-foundation.org>
21Cc: Peter Zijlstra <peterz@infradead.org>
22Signed-off-by: Ingo Molnar <mingo@kernel.org>
23(cherry picked from commit ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255)
24Signed-off-by: Andy Whitcroft <apw@canonical.com>
25Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
26(cherry picked from commit 0fa11d2cd3d67af676aa2762ade282ba6d09cbe5)
27Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
28---
29 arch/x86/mm/Makefile | 2 +-
30 arch/x86/include/asm/cpu_entry_area.h | 52 +++++++++++++++++
31 arch/x86/include/asm/fixmap.h | 41 +-------------
32 arch/x86/kernel/cpu/common.c | 94 ------------------------------
33 arch/x86/kernel/traps.c | 1 +
34 arch/x86/mm/cpu_entry_area.c | 104 ++++++++++++++++++++++++++++++++++
35 6 files changed, 159 insertions(+), 135 deletions(-)
36 create mode 100644 arch/x86/include/asm/cpu_entry_area.h
37 create mode 100644 arch/x86/mm/cpu_entry_area.c
38
39diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
40index 0fbdcb64f9f8..76f5399a8356 100644
41--- a/arch/x86/mm/Makefile
42+++ b/arch/x86/mm/Makefile
43@@ -2,7 +2,7 @@
44 KCOV_INSTRUMENT_tlb.o := n
45
46 obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
47- pat.o pgtable.o physaddr.o setup_nx.o tlb.o
48+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
49
50 # Make sure __phys_addr has no stackprotector
51 nostackp := $(call cc-option, -fno-stack-protector)
52diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
53new file mode 100644
54index 000000000000..5471826803af
55--- /dev/null
56+++ b/arch/x86/include/asm/cpu_entry_area.h
57@@ -0,0 +1,52 @@
58+// SPDX-License-Identifier: GPL-2.0
59+
60+#ifndef _ASM_X86_CPU_ENTRY_AREA_H
61+#define _ASM_X86_CPU_ENTRY_AREA_H
62+
63+#include <linux/percpu-defs.h>
64+#include <asm/processor.h>
65+
66+/*
67+ * cpu_entry_area is a percpu region that contains things needed by the CPU
68+ * and early entry/exit code. Real types aren't used for all fields here
69+ * to avoid circular header dependencies.
70+ *
71+ * Every field is a virtual alias of some other allocated backing store.
72+ * There is no direct allocation of a struct cpu_entry_area.
73+ */
74+struct cpu_entry_area {
75+ char gdt[PAGE_SIZE];
76+
77+ /*
78+ * The GDT is just below entry_stack and thus serves (on x86_64) as
79+ * a a read-only guard page.
80+ */
81+ struct entry_stack_page entry_stack_page;
82+
83+ /*
84+ * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
85+ * we need task switches to work, and task switches write to the TSS.
86+ */
87+ struct tss_struct tss;
88+
89+ char entry_trampoline[PAGE_SIZE];
90+
91+#ifdef CONFIG_X86_64
92+ /*
93+ * Exception stacks used for IST entries.
94+ *
95+ * In the future, this should have a separate slot for each stack
96+ * with guard pages between them.
97+ */
98+ char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
99+#endif
100+};
101+
102+#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
103+#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
104+
105+DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
106+
107+extern void setup_cpu_entry_areas(void);
108+
109+#endif
110diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
111index a7fb137ad964..1b2521473480 100644
112--- a/arch/x86/include/asm/fixmap.h
113+++ b/arch/x86/include/asm/fixmap.h
114@@ -25,6 +25,7 @@
115 #else
116 #include <uapi/asm/vsyscall.h>
117 #endif
118+#include <asm/cpu_entry_area.h>
119
120 /*
121 * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
122@@ -44,46 +45,6 @@ extern unsigned long __FIXADDR_TOP;
123 PAGE_SIZE)
124 #endif
125
126-/*
127- * cpu_entry_area is a percpu region in the fixmap that contains things
128- * needed by the CPU and early entry/exit code. Real types aren't used
129- * for all fields here to avoid circular header dependencies.
130- *
131- * Every field is a virtual alias of some other allocated backing store.
132- * There is no direct allocation of a struct cpu_entry_area.
133- */
134-struct cpu_entry_area {
135- char gdt[PAGE_SIZE];
136-
137- /*
138- * The GDT is just below entry_stack and thus serves (on x86_64) as
139- * a a read-only guard page.
140- */
141- struct entry_stack_page entry_stack_page;
142-
143- /*
144- * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
145- * we need task switches to work, and task switches write to the TSS.
146- */
147- struct tss_struct tss;
148-
149- char entry_trampoline[PAGE_SIZE];
150-
151-#ifdef CONFIG_X86_64
152- /*
153- * Exception stacks used for IST entries.
154- *
155- * In the future, this should have a separate slot for each stack
156- * with guard pages between them.
157- */
158- char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
159-#endif
160-};
161-
162-#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
163-
164-extern void setup_cpu_entry_areas(void);
165-
166 /*
167 * Here we define all the compile-time 'special' virtual
168 * addresses. The point is to have a constant address at
169diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
170index 7a8a5d436566..96171ce46d61 100644
171--- a/arch/x86/kernel/cpu/common.c
172+++ b/arch/x86/kernel/cpu/common.c
173@@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
174 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
175 [DEBUG_STACK - 1] = DEBUG_STKSZ
176 };
177-
178-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
179- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
180-#endif
181-
182-static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
183- entry_stack_storage);
184-
185-static void __init
186-set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
187-{
188- for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
189- __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
190-}
191-
192-/* Setup the fixmap mappings only once per-processor */
193-static void __init setup_cpu_entry_area(int cpu)
194-{
195-#ifdef CONFIG_X86_64
196- extern char _entry_trampoline[];
197-
198- /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
199- pgprot_t gdt_prot = PAGE_KERNEL_RO;
200- pgprot_t tss_prot = PAGE_KERNEL_RO;
201-#else
202- /*
203- * On native 32-bit systems, the GDT cannot be read-only because
204- * our double fault handler uses a task gate, and entering through
205- * a task gate needs to change an available TSS to busy. If the
206- * GDT is read-only, that will triple fault. The TSS cannot be
207- * read-only because the CPU writes to it on task switches.
208- *
209- * On Xen PV, the GDT must be read-only because the hypervisor
210- * requires it.
211- */
212- pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
213- PAGE_KERNEL_RO : PAGE_KERNEL;
214- pgprot_t tss_prot = PAGE_KERNEL;
215-#endif
216-
217- __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
218- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
219- per_cpu_ptr(&entry_stack_storage, cpu), 1,
220- PAGE_KERNEL);
221-
222- /*
223- * The Intel SDM says (Volume 3, 7.2.1):
224- *
225- * Avoid placing a page boundary in the part of the TSS that the
226- * processor reads during a task switch (the first 104 bytes). The
227- * processor may not correctly perform address translations if a
228- * boundary occurs in this area. During a task switch, the processor
229- * reads and writes into the first 104 bytes of each TSS (using
230- * contiguous physical addresses beginning with the physical address
231- * of the first byte of the TSS). So, after TSS access begins, if
232- * part of the 104 bytes is not physically contiguous, the processor
233- * will access incorrect information without generating a page-fault
234- * exception.
235- *
236- * There are also a lot of errata involving the TSS spanning a page
237- * boundary. Assert that we're not doing that.
238- */
239- BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
240- offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
241- BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
242- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
243- &per_cpu(cpu_tss_rw, cpu),
244- sizeof(struct tss_struct) / PAGE_SIZE,
245- tss_prot);
246-
247-#ifdef CONFIG_X86_32
248- per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
249 #endif
250
251-#ifdef CONFIG_X86_64
252- BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
253- BUILD_BUG_ON(sizeof(exception_stacks) !=
254- sizeof(((struct cpu_entry_area *)0)->exception_stacks));
255- set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
256- &per_cpu(exception_stacks, cpu),
257- sizeof(exception_stacks) / PAGE_SIZE,
258- PAGE_KERNEL);
259-
260- __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
261- __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
262-#endif
263-}
264-
265-void __init setup_cpu_entry_areas(void)
266-{
267- unsigned int cpu;
268-
269- for_each_possible_cpu(cpu)
270- setup_cpu_entry_area(cpu);
271-}
272-
273 /* Load the original GDT from the per-cpu structure */
274 void load_direct_gdt(int cpu)
275 {
276diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
277index 14b462eefa17..ef2d1b8a0516 100644
278--- a/arch/x86/kernel/traps.c
279+++ b/arch/x86/kernel/traps.c
280@@ -57,6 +57,7 @@
281 #include <asm/traps.h>
282 #include <asm/desc.h>
283 #include <asm/fpu/internal.h>
284+#include <asm/cpu_entry_area.h>
285 #include <asm/mce.h>
286 #include <asm/fixmap.h>
287 #include <asm/mach_traps.h>
288diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
289new file mode 100644
290index 000000000000..235ff9cfaaf4
291--- /dev/null
292+++ b/arch/x86/mm/cpu_entry_area.c
293@@ -0,0 +1,104 @@
294+// SPDX-License-Identifier: GPL-2.0
295+
296+#include <linux/spinlock.h>
297+#include <linux/percpu.h>
298+
299+#include <asm/cpu_entry_area.h>
300+#include <asm/pgtable.h>
301+#include <asm/fixmap.h>
302+#include <asm/desc.h>
303+
304+static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
305+
306+#ifdef CONFIG_X86_64
307+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
308+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
309+#endif
310+
311+static void __init
312+set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
313+{
314+ for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
315+ __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
316+}
317+
318+/* Setup the fixmap mappings only once per-processor */
319+static void __init setup_cpu_entry_area(int cpu)
320+{
321+#ifdef CONFIG_X86_64
322+ extern char _entry_trampoline[];
323+
324+ /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
325+ pgprot_t gdt_prot = PAGE_KERNEL_RO;
326+ pgprot_t tss_prot = PAGE_KERNEL_RO;
327+#else
328+ /*
329+ * On native 32-bit systems, the GDT cannot be read-only because
330+ * our double fault handler uses a task gate, and entering through
331+ * a task gate needs to change an available TSS to busy. If the
332+ * GDT is read-only, that will triple fault. The TSS cannot be
333+ * read-only because the CPU writes to it on task switches.
334+ *
335+ * On Xen PV, the GDT must be read-only because the hypervisor
336+ * requires it.
337+ */
338+ pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
339+ PAGE_KERNEL_RO : PAGE_KERNEL;
340+ pgprot_t tss_prot = PAGE_KERNEL;
341+#endif
342+
343+ __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
344+ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
345+ per_cpu_ptr(&entry_stack_storage, cpu), 1,
346+ PAGE_KERNEL);
347+
348+ /*
349+ * The Intel SDM says (Volume 3, 7.2.1):
350+ *
351+ * Avoid placing a page boundary in the part of the TSS that the
352+ * processor reads during a task switch (the first 104 bytes). The
353+ * processor may not correctly perform address translations if a
354+ * boundary occurs in this area. During a task switch, the processor
355+ * reads and writes into the first 104 bytes of each TSS (using
356+ * contiguous physical addresses beginning with the physical address
357+ * of the first byte of the TSS). So, after TSS access begins, if
358+ * part of the 104 bytes is not physically contiguous, the processor
359+ * will access incorrect information without generating a page-fault
360+ * exception.
361+ *
362+ * There are also a lot of errata involving the TSS spanning a page
363+ * boundary. Assert that we're not doing that.
364+ */
365+ BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
366+ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
367+ BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
368+ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
369+ &per_cpu(cpu_tss_rw, cpu),
370+ sizeof(struct tss_struct) / PAGE_SIZE,
371+ tss_prot);
372+
373+#ifdef CONFIG_X86_32
374+ per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
375+#endif
376+
377+#ifdef CONFIG_X86_64
378+ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
379+ BUILD_BUG_ON(sizeof(exception_stacks) !=
380+ sizeof(((struct cpu_entry_area *)0)->exception_stacks));
381+ set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
382+ &per_cpu(exception_stacks, cpu),
383+ sizeof(exception_stacks) / PAGE_SIZE,
384+ PAGE_KERNEL);
385+
386+ __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
387+ __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
388+#endif
389+}
390+
391+void __init setup_cpu_entry_areas(void)
392+{
393+ unsigned int cpu;
394+
395+ for_each_possible_cpu(cpu)
396+ setup_cpu_entry_area(cpu);
397+}
398--
3992.14.2
400