]>
Commit | Line | Data |
---|---|---|
321d628a FG |
1 | From d9fd6653e5dd9d80c7c75065329250529281e02d Mon Sep 17 00:00:00 2001 |
2 | From: Andy Lutomirski <luto@kernel.org> | |
3 | Date: Sun, 10 Sep 2017 17:48:27 -0700 | |
7c7389df | 4 | Subject: [PATCH 032/232] x86/mm/64: Initialize CR4.PCIDE early |
321d628a FG |
5 | MIME-Version: 1.0 |
6 | Content-Type: text/plain; charset=UTF-8 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ||
9 | CVE-2017-5754 | |
10 | ||
11 | cpu_init() is weird: it's called rather late (after early | |
12 | identification and after most MMU state is initialized) on the boot | |
13 | CPU but is called extremely early (before identification) on secondary | |
14 | CPUs. It's called just late enough on the boot CPU that its CR4 value | |
15 | isn't propagated to mmu_cr4_features. | |
16 | ||
17 | Even if we put CR4.PCIDE into mmu_cr4_features, we'd hit two | |
18 | problems. First, we'd crash in the trampoline code. That's | |
19 | fixable, and I tried that. It turns out that mmu_cr4_features is | |
20 | totally ignored by secondary_start_64(), though, so even with the | |
21 | trampoline code fixed, it wouldn't help. | |
22 | ||
23 | This means that we don't currently have CR4.PCIDE reliably initialized | |
24 | before we start playing with cpu_tlbstate. This is very fragile and | |
25 | tends to cause boot failures if I make even small changes to the TLB | |
26 | handling code. | |
27 | ||
28 | Make it more robust: initialize CR4.PCIDE earlier on the boot CPU | |
29 | and propagate it to secondary CPUs in start_secondary(). | |
30 | ||
31 | ( Yes, this is ugly. I think we should have improved mmu_cr4_features | |
32 | to actually control CR4 during secondary bootup, but that would be | |
33 | fairly intrusive at this stage. ) | |
34 | ||
35 | Signed-off-by: Andy Lutomirski <luto@kernel.org> | |
36 | Reported-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com> | |
37 | Tested-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com> | |
38 | Cc: Borislav Petkov <bpetkov@suse.de> | |
39 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
40 | Cc: Peter Zijlstra <peterz@infradead.org> | |
41 | Cc: Thomas Gleixner <tglx@linutronix.de> | |
42 | Cc: linux-kernel@vger.kernel.org | |
43 | Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems") | |
44 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
45 | (cherry picked from commit c7ad5ad297e644601747d6dbee978bf85e14f7bc) | |
46 | Signed-off-by: Andy Whitcroft <apw@canonical.com> | |
47 | Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> | |
48 | (cherry picked from commit 0e6a37a43aa876327e7d21881c09977da2d5c270) | |
49 | Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
50 | --- | |
51 | arch/x86/kernel/cpu/common.c | 49 +++++++------------------------------------- | |
52 | arch/x86/kernel/setup.c | 5 ++++- | |
53 | arch/x86/kernel/smpboot.c | 8 +++++--- | |
54 | arch/x86/mm/init.c | 34 ++++++++++++++++++++++++++++++ | |
55 | 4 files changed, 50 insertions(+), 46 deletions(-) | |
56 | ||
57 | diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c | |
58 | index 0b80ed14ff52..4be7b209a3d6 100644 | |
59 | --- a/arch/x86/kernel/cpu/common.c | |
60 | +++ b/arch/x86/kernel/cpu/common.c | |
61 | @@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s) | |
62 | __setup("nompx", x86_mpx_setup); | |
63 | ||
64 | #ifdef CONFIG_X86_64 | |
65 | -static int __init x86_pcid_setup(char *s) | |
66 | +static int __init x86_nopcid_setup(char *s) | |
67 | { | |
68 | - /* require an exact match without trailing characters */ | |
69 | - if (strlen(s)) | |
70 | - return 0; | |
71 | + /* nopcid doesn't accept parameters */ | |
72 | + if (s) | |
73 | + return -EINVAL; | |
74 | ||
75 | /* do not emit a message if the feature is not present */ | |
76 | if (!boot_cpu_has(X86_FEATURE_PCID)) | |
77 | - return 1; | |
78 | + return 0; | |
79 | ||
80 | setup_clear_cpu_cap(X86_FEATURE_PCID); | |
81 | pr_info("nopcid: PCID feature disabled\n"); | |
82 | - return 1; | |
83 | + return 0; | |
84 | } | |
85 | -__setup("nopcid", x86_pcid_setup); | |
86 | +early_param("nopcid", x86_nopcid_setup); | |
87 | #endif | |
88 | ||
89 | static int __init x86_noinvpcid_setup(char *s) | |
90 | @@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) | |
91 | } | |
92 | } | |
93 | ||
94 | -static void setup_pcid(struct cpuinfo_x86 *c) | |
95 | -{ | |
96 | - if (cpu_has(c, X86_FEATURE_PCID)) { | |
97 | - if (cpu_has(c, X86_FEATURE_PGE)) { | |
98 | - /* | |
99 | - * We'd like to use cr4_set_bits_and_update_boot(), | |
100 | - * but we can't. CR4.PCIDE is special and can only | |
101 | - * be set in long mode, and the early CPU init code | |
102 | - * doesn't know this and would try to restore CR4.PCIDE | |
103 | - * prior to entering long mode. | |
104 | - * | |
105 | - * Instead, we rely on the fact that hotplug, resume, | |
106 | - * etc all fully restore CR4 before they write anything | |
107 | - * that could have nonzero PCID bits to CR3. CR4.PCIDE | |
108 | - * has no effect on the page tables themselves, so we | |
109 | - * don't need it to be restored early. | |
110 | - */ | |
111 | - cr4_set_bits(X86_CR4_PCIDE); | |
112 | - } else { | |
113 | - /* | |
114 | - * flush_tlb_all(), as currently implemented, won't | |
115 | - * work if PCID is on but PGE is not. Since that | |
116 | - * combination doesn't exist on real hardware, there's | |
117 | - * no reason to try to fully support it, but it's | |
118 | - * polite to avoid corrupting data if we're on | |
119 | - * an improperly configured VM. | |
120 | - */ | |
121 | - clear_cpu_cap(c, X86_FEATURE_PCID); | |
122 | - } | |
123 | - } | |
124 | -} | |
125 | - | |
126 | /* | |
127 | * Protection Keys are not available in 32-bit mode. | |
128 | */ | |
129 | @@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) | |
130 | setup_smep(c); | |
131 | setup_smap(c); | |
132 | ||
133 | - /* Set up PCID */ | |
134 | - setup_pcid(c); | |
135 | - | |
136 | /* | |
137 | * The vendor-specific functions might have changed features. | |
138 | * Now we do "generic changes." | |
139 | diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c | |
140 | index d7e8b983aa72..f964bfddfefd 100644 | |
141 | --- a/arch/x86/kernel/setup.c | |
142 | +++ b/arch/x86/kernel/setup.c | |
143 | @@ -1174,8 +1174,11 @@ void __init setup_arch(char **cmdline_p) | |
144 | * with the current CR4 value. This may not be necessary, but | |
145 | * auditing all the early-boot CR4 manipulation would be needed to | |
146 | * rule it out. | |
147 | + * | |
148 | + * Mask off features that don't work outside long mode (just | |
149 | + * PCIDE for now). | |
150 | */ | |
151 | - mmu_cr4_features = __read_cr4(); | |
152 | + mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE; | |
153 | ||
154 | memblock_set_current_limit(get_max_mapped()); | |
155 | ||
156 | diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c | |
157 | index 893fd8c849e2..d05006f6c31c 100644 | |
158 | --- a/arch/x86/kernel/smpboot.c | |
159 | +++ b/arch/x86/kernel/smpboot.c | |
160 | @@ -227,10 +227,12 @@ static int enable_start_cpu0; | |
161 | static void notrace start_secondary(void *unused) | |
162 | { | |
163 | /* | |
164 | - * Don't put *anything* before cpu_init(), SMP booting is too | |
165 | - * fragile that we want to limit the things done here to the | |
166 | - * most necessary things. | |
167 | + * Don't put *anything* except direct CPU state initialization | |
168 | + * before cpu_init(), SMP booting is too fragile that we want to | |
169 | + * limit the things done here to the most necessary things. | |
170 | */ | |
171 | + if (boot_cpu_has(X86_FEATURE_PCID)) | |
172 | + __write_cr4(__read_cr4() | X86_CR4_PCIDE); | |
173 | cpu_init(); | |
174 | x86_cpuinit.early_percpu_clock_init(); | |
175 | preempt_disable(); | |
176 | diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c | |
177 | index bf3f1065d6ad..df2624b091a7 100644 | |
178 | --- a/arch/x86/mm/init.c | |
179 | +++ b/arch/x86/mm/init.c | |
180 | @@ -19,6 +19,7 @@ | |
181 | #include <asm/microcode.h> | |
182 | #include <asm/kaslr.h> | |
183 | #include <asm/hypervisor.h> | |
184 | +#include <asm/cpufeature.h> | |
185 | ||
186 | /* | |
187 | * We need to define the tracepoints somewhere, and tlb.c | |
188 | @@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void) | |
189 | } | |
190 | } | |
191 | ||
192 | +static void setup_pcid(void) | |
193 | +{ | |
194 | +#ifdef CONFIG_X86_64 | |
195 | + if (boot_cpu_has(X86_FEATURE_PCID)) { | |
196 | + if (boot_cpu_has(X86_FEATURE_PGE)) { | |
197 | + /* | |
198 | + * This can't be cr4_set_bits_and_update_boot() -- | |
199 | + * the trampoline code can't handle CR4.PCIDE and | |
200 | + * it wouldn't do any good anyway. Despite the name, | |
201 | + * cr4_set_bits_and_update_boot() doesn't actually | |
202 | + * cause the bits in question to remain set all the | |
203 | + * way through the secondary boot asm. | |
204 | + * | |
205 | + * Instead, we brute-force it and set CR4.PCIDE | |
206 | + * manually in start_secondary(). | |
207 | + */ | |
208 | + cr4_set_bits(X86_CR4_PCIDE); | |
209 | + } else { | |
210 | + /* | |
211 | + * flush_tlb_all(), as currently implemented, won't | |
212 | + * work if PCID is on but PGE is not. Since that | |
213 | + * combination doesn't exist on real hardware, there's | |
214 | + * no reason to try to fully support it, but it's | |
215 | + * polite to avoid corrupting data if we're on | |
216 | + * an improperly configured VM. | |
217 | + */ | |
218 | + setup_clear_cpu_cap(X86_FEATURE_PCID); | |
219 | + } | |
220 | + } | |
221 | +#endif | |
222 | +} | |
223 | + | |
224 | #ifdef CONFIG_X86_32 | |
225 | #define NR_RANGE_MR 3 | |
226 | #else /* CONFIG_X86_64 */ | |
227 | @@ -592,6 +625,7 @@ void __init init_mem_mapping(void) | |
228 | unsigned long end; | |
229 | ||
230 | probe_page_size_mask(); | |
231 | + setup_pcid(); | |
232 | ||
233 | #ifdef CONFIG_X86_64 | |
234 | end = max_pfn << PAGE_SHIFT; | |
235 | -- | |
236 | 2.14.2 | |
237 |