]>
Commit | Line | Data |
---|---|---|
59d5af67 | 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
321d628a FG |
2 | From: Andy Lutomirski <luto@kernel.org> |
3 | Date: Mon, 4 Dec 2017 15:07:20 +0100 | |
59d5af67 | 4 | Subject: [PATCH] x86/entry: Remap the TSS into the CPU entry area |
321d628a FG |
5 | MIME-Version: 1.0 |
6 | Content-Type: text/plain; charset=UTF-8 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ||
9 | CVE-2017-5754 | |
10 | ||
11 | This has a secondary purpose: it puts the entry stack into a region | |
12 | with a well-controlled layout. A subsequent patch will take | |
13 | advantage of this to streamline the SYSCALL entry code to be able to | |
14 | find it more easily. | |
15 | ||
16 | Signed-off-by: Andy Lutomirski <luto@kernel.org> | |
17 | Signed-off-by: Thomas Gleixner <tglx@linutronix.de> | |
18 | Reviewed-by: Thomas Gleixner <tglx@linutronix.de> | |
19 | Reviewed-by: Borislav Petkov <bpetkov@suse.de> | |
20 | Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> | |
21 | Cc: Borislav Petkov <bp@alien8.de> | |
22 | Cc: Brian Gerst <brgerst@gmail.com> | |
23 | Cc: Dave Hansen <dave.hansen@intel.com> | |
24 | Cc: Dave Hansen <dave.hansen@linux.intel.com> | |
25 | Cc: David Laight <David.Laight@aculab.com> | |
26 | Cc: Denys Vlasenko <dvlasenk@redhat.com> | |
27 | Cc: Eduardo Valentin <eduval@amazon.com> | |
28 | Cc: Greg KH <gregkh@linuxfoundation.org> | |
29 | Cc: H. Peter Anvin <hpa@zytor.com> | |
30 | Cc: Josh Poimboeuf <jpoimboe@redhat.com> | |
31 | Cc: Juergen Gross <jgross@suse.com> | |
32 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
33 | Cc: Peter Zijlstra <peterz@infradead.org> | |
34 | Cc: Rik van Riel <riel@redhat.com> | |
35 | Cc: Will Deacon <will.deacon@arm.com> | |
36 | Cc: aliguori@amazon.com | |
37 | Cc: daniel.gruss@iaik.tugraz.at | |
38 | Cc: hughd@google.com | |
39 | Cc: keescook@google.com | |
40 | Link: https://lkml.kernel.org/r/20171204150605.962042855@linutronix.de | |
41 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
42 | (cherry picked from commit 72f5e08dbba2d01aa90b592cf76c378ea233b00b) | |
43 | Signed-off-by: Andy Whitcroft <apw@canonical.com> | |
44 | Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> | |
45 | (cherry picked from commit 475b37e78defbc4cb91d54e2bcf18aa75611bb3a) | |
46 | Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
47 | --- | |
48 | arch/x86/include/asm/fixmap.h | 7 +++++++ | |
49 | arch/x86/kernel/asm-offsets.c | 3 +++ | |
50 | arch/x86/kernel/cpu/common.c | 41 +++++++++++++++++++++++++++++++++++------ | |
51 | arch/x86/kernel/dumpstack.c | 3 ++- | |
52 | arch/x86/kvm/vmx.c | 2 +- | |
53 | arch/x86/power/cpu.c | 11 ++++++----- | |
54 | arch/x86/entry/entry_32.S | 6 ++++-- | |
55 | 7 files changed, 58 insertions(+), 15 deletions(-) | |
56 | ||
57 | diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h | |
58 | index 8c6ed66fe957..c92fc30e6def 100644 | |
59 | --- a/arch/x86/include/asm/fixmap.h | |
60 | +++ b/arch/x86/include/asm/fixmap.h | |
61 | @@ -54,6 +54,13 @@ extern unsigned long __FIXADDR_TOP; | |
62 | */ | |
63 | struct cpu_entry_area { | |
64 | char gdt[PAGE_SIZE]; | |
65 | + | |
66 | + /* | |
67 | + * The GDT is just below cpu_tss and thus serves (on x86_64) as a | |
68 | + * a read-only guard page for the SYSENTER stack at the bottom | |
69 | + * of the TSS region. | |
70 | + */ | |
71 | + struct tss_struct tss; | |
72 | }; | |
73 | ||
74 | #define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE) | |
75 | diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c | |
76 | index 031bd35bd911..f765c3253ec3 100644 | |
77 | --- a/arch/x86/kernel/asm-offsets.c | |
78 | +++ b/arch/x86/kernel/asm-offsets.c | |
79 | @@ -97,4 +97,7 @@ void common(void) { | |
80 | OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack); | |
81 | /* Size of SYSENTER_stack */ | |
82 | DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack)); | |
83 | + | |
84 | + /* Layout info for cpu_entry_area */ | |
85 | + OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss); | |
86 | } | |
87 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c | |
88 | index e61eff11f562..4a38de4c6ede 100644 | |
89 | --- a/arch/x86/kernel/cpu/common.c | |
90 | +++ b/arch/x86/kernel/cpu/common.c | |
91 | @@ -466,6 +466,22 @@ void load_percpu_segment(int cpu) | |
92 | load_stack_canary_segment(); | |
93 | } | |
94 | ||
95 | +static void set_percpu_fixmap_pages(int fixmap_index, void *ptr, | |
96 | + int pages, pgprot_t prot) | |
97 | +{ | |
98 | + int i; | |
99 | + | |
100 | + for (i = 0; i < pages; i++) { | |
101 | + __set_fixmap(fixmap_index - i, | |
102 | + per_cpu_ptr_to_phys(ptr + i * PAGE_SIZE), prot); | |
103 | + } | |
104 | +} | |
105 | + | |
106 | +#ifdef CONFIG_X86_32 | |
107 | +/* The 32-bit entry code needs to find cpu_entry_area. */ | |
108 | +DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); | |
109 | +#endif | |
110 | + | |
111 | /* Setup the fixmap mappings only once per-processor */ | |
112 | static inline void setup_cpu_entry_area(int cpu) | |
113 | { | |
114 | @@ -507,7 +523,15 @@ static inline void setup_cpu_entry_area(int cpu) | |
115 | */ | |
116 | BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ | |
117 | offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); | |
118 | + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); | |
119 | + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss), | |
120 | + &per_cpu(cpu_tss, cpu), | |
121 | + sizeof(struct tss_struct) / PAGE_SIZE, | |
122 | + PAGE_KERNEL); | |
123 | ||
124 | +#ifdef CONFIG_X86_32 | |
125 | + this_cpu_write(cpu_entry_area, get_cpu_entry_area(cpu)); | |
126 | +#endif | |
127 | } | |
128 | ||
129 | /* Load the original GDT from the per-cpu structure */ | |
130 | @@ -1249,7 +1273,8 @@ void enable_sep_cpu(void) | |
131 | wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0); | |
132 | ||
133 | wrmsr(MSR_IA32_SYSENTER_ESP, | |
134 | - (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack), | |
135 | + (unsigned long)&get_cpu_entry_area(cpu)->tss + | |
136 | + offsetofend(struct tss_struct, SYSENTER_stack), | |
137 | 0); | |
138 | ||
139 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0); | |
140 | @@ -1371,6 +1396,8 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | |
141 | /* May not be marked __init: used by software suspend */ | |
142 | void syscall_init(void) | |
143 | { | |
144 | + int cpu = smp_processor_id(); | |
145 | + | |
146 | wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); | |
147 | wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); | |
148 | ||
149 | @@ -1384,7 +1411,7 @@ void syscall_init(void) | |
150 | */ | |
151 | wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | |
152 | wrmsrl_safe(MSR_IA32_SYSENTER_ESP, | |
153 | - (unsigned long)this_cpu_ptr(&cpu_tss) + | |
154 | + (unsigned long)&get_cpu_entry_area(cpu)->tss + | |
155 | offsetofend(struct tss_struct, SYSENTER_stack)); | |
156 | wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); | |
157 | #else | |
158 | @@ -1593,11 +1620,13 @@ void cpu_init(void) | |
159 | BUG_ON(me->mm); | |
160 | enter_lazy_tlb(&init_mm, me); | |
161 | ||
162 | + setup_cpu_entry_area(cpu); | |
163 | + | |
164 | /* | |
165 | * Initialize the TSS. Don't bother initializing sp0, as the initial | |
166 | * task never enters user mode. | |
167 | */ | |
168 | - set_tss_desc(cpu, &t->x86_tss); | |
169 | + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); | |
170 | load_TR_desc(); | |
171 | ||
172 | load_mm_ldt(&init_mm); | |
173 | @@ -1610,7 +1639,6 @@ void cpu_init(void) | |
174 | if (is_uv_system()) | |
175 | uv_cpu_init(); | |
176 | ||
177 | - setup_cpu_entry_area(cpu); | |
178 | load_fixmap_gdt(cpu); | |
179 | } | |
180 | ||
181 | @@ -1650,11 +1678,13 @@ void cpu_init(void) | |
182 | BUG_ON(curr->mm); | |
183 | enter_lazy_tlb(&init_mm, curr); | |
184 | ||
185 | + setup_cpu_entry_area(cpu); | |
186 | + | |
187 | /* | |
188 | * Initialize the TSS. Don't bother initializing sp0, as the initial | |
189 | * task never enters user mode. | |
190 | */ | |
191 | - set_tss_desc(cpu, &t->x86_tss); | |
192 | + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); | |
193 | load_TR_desc(); | |
194 | ||
195 | load_mm_ldt(&init_mm); | |
196 | @@ -1671,7 +1701,6 @@ void cpu_init(void) | |
197 | ||
198 | fpu__init_cpu(); | |
199 | ||
200 | - setup_cpu_entry_area(cpu); | |
201 | load_fixmap_gdt(cpu); | |
202 | } | |
203 | #endif | |
204 | diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c | |
205 | index 0f4b931e1a02..c1f503673f1e 100644 | |
206 | --- a/arch/x86/kernel/dumpstack.c | |
207 | +++ b/arch/x86/kernel/dumpstack.c | |
208 | @@ -45,7 +45,8 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task, | |
209 | ||
210 | bool in_sysenter_stack(unsigned long *stack, struct stack_info *info) | |
211 | { | |
212 | - struct tss_struct *tss = this_cpu_ptr(&cpu_tss); | |
213 | + int cpu = smp_processor_id(); | |
214 | + struct tss_struct *tss = &get_cpu_entry_area(cpu)->tss; | |
215 | ||
216 | /* Treat the canary as part of the stack for unwinding purposes. */ | |
217 | void *begin = &tss->SYSENTER_stack_canary; | |
218 | diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c | |
219 | index a7c5a47beab7..d61986a36575 100644 | |
220 | --- a/arch/x86/kvm/vmx.c | |
221 | +++ b/arch/x86/kvm/vmx.c | |
222 | @@ -2280,7 +2280,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |
223 | * processors. See 22.2.4. | |
224 | */ | |
225 | vmcs_writel(HOST_TR_BASE, | |
226 | - (unsigned long)this_cpu_ptr(&cpu_tss.x86_tss)); | |
227 | + (unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss); | |
228 | vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */ | |
229 | ||
230 | /* | |
231 | diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c | |
232 | index 48cd87fc7222..2a717e023c9f 100644 | |
233 | --- a/arch/x86/power/cpu.c | |
234 | +++ b/arch/x86/power/cpu.c | |
235 | @@ -160,18 +160,19 @@ static void do_fpu_end(void) | |
236 | static void fix_processor_context(void) | |
237 | { | |
238 | int cpu = smp_processor_id(); | |
239 | - struct tss_struct *t = &per_cpu(cpu_tss, cpu); | |
240 | #ifdef CONFIG_X86_64 | |
241 | struct desc_struct *desc = get_cpu_gdt_rw(cpu); | |
242 | tss_desc tss; | |
243 | #endif | |
244 | ||
245 | /* | |
246 | - * This just modifies memory; should not be necessary. But... This is | |
247 | - * necessary, because 386 hardware has concept of busy TSS or some | |
248 | - * similar stupidity. | |
249 | + * We need to reload TR, which requires that we change the | |
250 | + * GDT entry to indicate "available" first. | |
251 | + * | |
252 | + * XXX: This could probably all be replaced by a call to | |
253 | + * force_reload_TR(). | |
254 | */ | |
255 | - set_tss_desc(cpu, &t->x86_tss); | |
256 | + set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); | |
257 | ||
258 | #ifdef CONFIG_X86_64 | |
259 | memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc)); | |
260 | diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S | |
261 | index 0092da1c056f..41e0e103f090 100644 | |
262 | --- a/arch/x86/entry/entry_32.S | |
263 | +++ b/arch/x86/entry/entry_32.S | |
264 | @@ -948,7 +948,8 @@ ENTRY(debug) | |
265 | movl %esp, %eax # pt_regs pointer | |
266 | ||
267 | /* Are we currently on the SYSENTER stack? */ | |
268 | - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) | |
269 | + movl PER_CPU_VAR(cpu_entry_area), %ecx | |
270 | + addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx | |
271 | subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ | |
272 | cmpl $SIZEOF_SYSENTER_stack, %ecx | |
273 | jb .Ldebug_from_sysenter_stack | |
274 | @@ -991,7 +992,8 @@ ENTRY(nmi) | |
275 | movl %esp, %eax # pt_regs pointer | |
276 | ||
277 | /* Are we currently on the SYSENTER stack? */ | |
278 | - PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx) | |
279 | + movl PER_CPU_VAR(cpu_entry_area), %ecx | |
280 | + addl $CPU_ENTRY_AREA_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx | |
281 | subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */ | |
282 | cmpl $SIZEOF_SYSENTER_stack, %ecx | |
283 | jb .Lnmi_from_sysenter_stack | |
284 | -- | |
285 | 2.14.2 | |
286 |