]> git.proxmox.com Git - pve-kernel.git/blob - patches/kernel/0032-x86-mm-64-Initialize-CR4.PCIDE-early.patch
revert buggy SCSI error handler commit
[pve-kernel.git] / patches / kernel / 0032-x86-mm-64-Initialize-CR4.PCIDE-early.patch
1 From d9fd6653e5dd9d80c7c75065329250529281e02d Mon Sep 17 00:00:00 2001
2 From: Andy Lutomirski <luto@kernel.org>
3 Date: Sun, 10 Sep 2017 17:48:27 -0700
4 Subject: [PATCH 032/242] x86/mm/64: Initialize CR4.PCIDE early
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 CVE-2017-5754
10
11 cpu_init() is weird: it's called rather late (after early
12 identification and after most MMU state is initialized) on the boot
13 CPU but is called extremely early (before identification) on secondary
14 CPUs. It's called just late enough on the boot CPU that its CR4 value
15 isn't propagated to mmu_cr4_features.
16
17 Even if we put CR4.PCIDE into mmu_cr4_features, we'd hit two
18 problems. First, we'd crash in the trampoline code. That's
19 fixable, and I tried that. It turns out that mmu_cr4_features is
20 totally ignored by secondary_start_64(), though, so even with the
21 trampoline code fixed, it wouldn't help.
22
23 This means that we don't currently have CR4.PCIDE reliably initialized
24 before we start playing with cpu_tlbstate. This is very fragile and
25 tends to cause boot failures if I make even small changes to the TLB
26 handling code.
27
28 Make it more robust: initialize CR4.PCIDE earlier on the boot CPU
29 and propagate it to secondary CPUs in start_secondary().
30
31 ( Yes, this is ugly. I think we should have improved mmu_cr4_features
32 to actually control CR4 during secondary bootup, but that would be
33 fairly intrusive at this stage. )
34
35 Signed-off-by: Andy Lutomirski <luto@kernel.org>
36 Reported-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
37 Tested-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
38 Cc: Borislav Petkov <bpetkov@suse.de>
39 Cc: Linus Torvalds <torvalds@linux-foundation.org>
40 Cc: Peter Zijlstra <peterz@infradead.org>
41 Cc: Thomas Gleixner <tglx@linutronix.de>
42 Cc: linux-kernel@vger.kernel.org
43 Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
44 Signed-off-by: Ingo Molnar <mingo@kernel.org>
45 (cherry picked from commit c7ad5ad297e644601747d6dbee978bf85e14f7bc)
46 Signed-off-by: Andy Whitcroft <apw@canonical.com>
47 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
48 (cherry picked from commit 0e6a37a43aa876327e7d21881c09977da2d5c270)
49 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
50 ---
51 arch/x86/kernel/cpu/common.c | 49 +++++++-------------------------------------
52 arch/x86/kernel/setup.c | 5 ++++-
53 arch/x86/kernel/smpboot.c | 8 +++++---
54 arch/x86/mm/init.c | 34 ++++++++++++++++++++++++++++++
55 4 files changed, 50 insertions(+), 46 deletions(-)
56
57 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
58 index 0b80ed14ff52..4be7b209a3d6 100644
59 --- a/arch/x86/kernel/cpu/common.c
60 +++ b/arch/x86/kernel/cpu/common.c
61 @@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
62 __setup("nompx", x86_mpx_setup);
63
64 #ifdef CONFIG_X86_64
65 -static int __init x86_pcid_setup(char *s)
66 +static int __init x86_nopcid_setup(char *s)
67 {
68 - /* require an exact match without trailing characters */
69 - if (strlen(s))
70 - return 0;
71 + /* nopcid doesn't accept parameters */
72 + if (s)
73 + return -EINVAL;
74
75 /* do not emit a message if the feature is not present */
76 if (!boot_cpu_has(X86_FEATURE_PCID))
77 - return 1;
78 + return 0;
79
80 setup_clear_cpu_cap(X86_FEATURE_PCID);
81 pr_info("nopcid: PCID feature disabled\n");
82 - return 1;
83 + return 0;
84 }
85 -__setup("nopcid", x86_pcid_setup);
86 +early_param("nopcid", x86_nopcid_setup);
87 #endif
88
89 static int __init x86_noinvpcid_setup(char *s)
90 @@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
91 }
92 }
93
94 -static void setup_pcid(struct cpuinfo_x86 *c)
95 -{
96 - if (cpu_has(c, X86_FEATURE_PCID)) {
97 - if (cpu_has(c, X86_FEATURE_PGE)) {
98 - /*
99 - * We'd like to use cr4_set_bits_and_update_boot(),
100 - * but we can't. CR4.PCIDE is special and can only
101 - * be set in long mode, and the early CPU init code
102 - * doesn't know this and would try to restore CR4.PCIDE
103 - * prior to entering long mode.
104 - *
105 - * Instead, we rely on the fact that hotplug, resume,
106 - * etc all fully restore CR4 before they write anything
107 - * that could have nonzero PCID bits to CR3. CR4.PCIDE
108 - * has no effect on the page tables themselves, so we
109 - * don't need it to be restored early.
110 - */
111 - cr4_set_bits(X86_CR4_PCIDE);
112 - } else {
113 - /*
114 - * flush_tlb_all(), as currently implemented, won't
115 - * work if PCID is on but PGE is not. Since that
116 - * combination doesn't exist on real hardware, there's
117 - * no reason to try to fully support it, but it's
118 - * polite to avoid corrupting data if we're on
119 - * an improperly configured VM.
120 - */
121 - clear_cpu_cap(c, X86_FEATURE_PCID);
122 - }
123 - }
124 -}
125 -
126 /*
127 * Protection Keys are not available in 32-bit mode.
128 */
129 @@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
130 setup_smep(c);
131 setup_smap(c);
132
133 - /* Set up PCID */
134 - setup_pcid(c);
135 -
136 /*
137 * The vendor-specific functions might have changed features.
138 * Now we do "generic changes."
139 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
140 index d7e8b983aa72..f964bfddfefd 100644
141 --- a/arch/x86/kernel/setup.c
142 +++ b/arch/x86/kernel/setup.c
143 @@ -1174,8 +1174,11 @@ void __init setup_arch(char **cmdline_p)
144 * with the current CR4 value. This may not be necessary, but
145 * auditing all the early-boot CR4 manipulation would be needed to
146 * rule it out.
147 + *
148 + * Mask off features that don't work outside long mode (just
149 + * PCIDE for now).
150 */
151 - mmu_cr4_features = __read_cr4();
152 + mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
153
154 memblock_set_current_limit(get_max_mapped());
155
156 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
157 index 893fd8c849e2..d05006f6c31c 100644
158 --- a/arch/x86/kernel/smpboot.c
159 +++ b/arch/x86/kernel/smpboot.c
160 @@ -227,10 +227,12 @@ static int enable_start_cpu0;
161 static void notrace start_secondary(void *unused)
162 {
163 /*
164 - * Don't put *anything* before cpu_init(), SMP booting is too
165 - * fragile that we want to limit the things done here to the
166 - * most necessary things.
167 + * Don't put *anything* except direct CPU state initialization
168 + * before cpu_init(), SMP booting is too fragile that we want to
169 + * limit the things done here to the most necessary things.
170 */
171 + if (boot_cpu_has(X86_FEATURE_PCID))
172 + __write_cr4(__read_cr4() | X86_CR4_PCIDE);
173 cpu_init();
174 x86_cpuinit.early_percpu_clock_init();
175 preempt_disable();
176 diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
177 index bf3f1065d6ad..df2624b091a7 100644
178 --- a/arch/x86/mm/init.c
179 +++ b/arch/x86/mm/init.c
180 @@ -19,6 +19,7 @@
181 #include <asm/microcode.h>
182 #include <asm/kaslr.h>
183 #include <asm/hypervisor.h>
184 +#include <asm/cpufeature.h>
185
186 /*
187 * We need to define the tracepoints somewhere, and tlb.c
188 @@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
189 }
190 }
191
192 +static void setup_pcid(void)
193 +{
194 +#ifdef CONFIG_X86_64
195 + if (boot_cpu_has(X86_FEATURE_PCID)) {
196 + if (boot_cpu_has(X86_FEATURE_PGE)) {
197 + /*
198 + * This can't be cr4_set_bits_and_update_boot() --
199 + * the trampoline code can't handle CR4.PCIDE and
200 + * it wouldn't do any good anyway. Despite the name,
201 + * cr4_set_bits_and_update_boot() doesn't actually
202 + * cause the bits in question to remain set all the
203 + * way through the secondary boot asm.
204 + *
205 + * Instead, we brute-force it and set CR4.PCIDE
206 + * manually in start_secondary().
207 + */
208 + cr4_set_bits(X86_CR4_PCIDE);
209 + } else {
210 + /*
211 + * flush_tlb_all(), as currently implemented, won't
212 + * work if PCID is on but PGE is not. Since that
213 + * combination doesn't exist on real hardware, there's
214 + * no reason to try to fully support it, but it's
215 + * polite to avoid corrupting data if we're on
216 + * an improperly configured VM.
217 + */
218 + setup_clear_cpu_cap(X86_FEATURE_PCID);
219 + }
220 + }
221 +#endif
222 +}
223 +
224 #ifdef CONFIG_X86_32
225 #define NR_RANGE_MR 3
226 #else /* CONFIG_X86_64 */
227 @@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
228 unsigned long end;
229
230 probe_page_size_mask();
231 + setup_pcid();
232
233 #ifdef CONFIG_X86_64
234 end = max_pfn << PAGE_SHIFT;
235 --
236 2.14.2
237