]>
Commit | Line | Data |
---|---|---|
321d628a FG |
1 | From 534c2338c3f72069910c06eba7693a4a1d15faf8 Mon Sep 17 00:00:00 2001 |
2 | From: Dave Hansen <dave.hansen@linux.intel.com> | |
3 | Date: Mon, 4 Dec 2017 15:08:01 +0100 | |
633c5ed1 | 4 | Subject: [PATCH 211/242] x86/mm: Use INVPCID for __native_flush_tlb_single() |
321d628a FG |
5 | MIME-Version: 1.0 |
6 | Content-Type: text/plain; charset=UTF-8 | |
7 | Content-Transfer-Encoding: 8bit | |
8 | ||
9 | CVE-2017-5754 | |
10 | ||
11 | This uses INVPCID to shoot down individual lines of the user mapping | |
12 | instead of marking the entire user map as invalid. This | |
13 | could/might/possibly be faster. | |
14 | ||
15 | This for sure needs tlb_single_page_flush_ceiling to be redetermined; | |
16 | esp. since INVPCID is _slow_. | |
17 | ||
18 | A detailed performance analysis is available here: | |
19 | ||
20 | https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com | |
21 | ||
22 | [ Peterz: Split out from big combo patch ] | |
23 | ||
24 | Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> | |
25 | Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> | |
26 | Signed-off-by: Thomas Gleixner <tglx@linutronix.de> | |
27 | Cc: Andy Lutomirski <luto@kernel.org> | |
28 | Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> | |
29 | Cc: Borislav Petkov <bp@alien8.de> | |
30 | Cc: Brian Gerst <brgerst@gmail.com> | |
31 | Cc: Denys Vlasenko <dvlasenk@redhat.com> | |
32 | Cc: Eduardo Valentin <eduval@amazon.com> | |
33 | Cc: Greg KH <gregkh@linuxfoundation.org> | |
34 | Cc: H. Peter Anvin <hpa@zytor.com> | |
35 | Cc: Josh Poimboeuf <jpoimboe@redhat.com> | |
36 | Cc: Juergen Gross <jgross@suse.com> | |
37 | Cc: Linus Torvalds <torvalds@linux-foundation.org> | |
38 | Cc: Peter Zijlstra <peterz@infradead.org> | |
39 | Cc: Will Deacon <will.deacon@arm.com> | |
40 | Cc: aliguori@amazon.com | |
41 | Cc: daniel.gruss@iaik.tugraz.at | |
42 | Cc: hughd@google.com | |
43 | Cc: keescook@google.com | |
44 | Signed-off-by: Ingo Molnar <mingo@kernel.org> | |
45 | (cherry picked from commit 6cff64b86aaaa07f89f50498055a20e45754b0c1) | |
46 | Signed-off-by: Andy Whitcroft <apw@canonical.com> | |
47 | Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> | |
48 | (cherry picked from commit e4986a4e89c0eb40f824a8505feefff3328ad4b2) | |
49 | Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> | |
50 | --- | |
51 | arch/x86/include/asm/cpufeatures.h | 1 + | |
52 | arch/x86/include/asm/tlbflush.h | 23 +++++++++++++- | |
53 | arch/x86/mm/init.c | 64 ++++++++++++++++++++++---------------- | |
54 | 3 files changed, 60 insertions(+), 28 deletions(-) | |
55 | ||
56 | diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h | |
57 | index de4e91452de4..9b0c283afcf0 100644 | |
58 | --- a/arch/x86/include/asm/cpufeatures.h | |
59 | +++ b/arch/x86/include/asm/cpufeatures.h | |
60 | @@ -196,6 +196,7 @@ | |
61 | #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ | |
62 | #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ | |
63 | #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ | |
64 | +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ | |
65 | ||
66 | #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ | |
67 | #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ | |
68 | diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h | |
69 | index 2b7b32c243f1..979e590648a5 100644 | |
70 | --- a/arch/x86/include/asm/tlbflush.h | |
71 | +++ b/arch/x86/include/asm/tlbflush.h | |
72 | @@ -84,6 +84,18 @@ static inline u16 kern_pcid(u16 asid) | |
73 | return asid + 1; | |
74 | } | |
75 | ||
76 | +/* | |
77 | + * The user PCID is just the kernel one, plus the "switch bit". | |
78 | + */ | |
79 | +static inline u16 user_pcid(u16 asid) | |
80 | +{ | |
81 | + u16 ret = kern_pcid(asid); | |
82 | +#ifdef CONFIG_PAGE_TABLE_ISOLATION | |
83 | + ret |= 1 << X86_CR3_PTI_SWITCH_BIT; | |
84 | +#endif | |
85 | + return ret; | |
86 | +} | |
87 | + | |
88 | struct pgd_t; | |
89 | static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) | |
90 | { | |
91 | @@ -324,6 +336,8 @@ static inline void __native_flush_tlb_global(void) | |
92 | /* | |
93 | * Using INVPCID is considerably faster than a pair of writes | |
94 | * to CR4 sandwiched inside an IRQ flag save/restore. | |
95 | + * | |
96 | + * Note, this works with CR4.PCIDE=0 or 1. | |
97 | */ | |
98 | invpcid_flush_all(); | |
99 | return; | |
100 | @@ -357,7 +371,14 @@ static inline void __native_flush_tlb_single(unsigned long addr) | |
101 | if (!static_cpu_has(X86_FEATURE_PTI)) | |
102 | return; | |
103 | ||
104 | - invalidate_user_asid(loaded_mm_asid); | |
105 | + /* | |
106 | + * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1. | |
107 | + * Just use invalidate_user_asid() in case we are called early. | |
108 | + */ | |
109 | + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) | |
110 | + invalidate_user_asid(loaded_mm_asid); | |
111 | + else | |
112 | + invpcid_flush_one(user_pcid(loaded_mm_asid), addr); | |
113 | } | |
114 | ||
115 | /* | |
116 | diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c | |
117 | index caeb8a7bf0a4..80259ad8c386 100644 | |
118 | --- a/arch/x86/mm/init.c | |
119 | +++ b/arch/x86/mm/init.c | |
120 | @@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void) | |
121 | ||
122 | static void setup_pcid(void) | |
123 | { | |
124 | -#ifdef CONFIG_X86_64 | |
125 | - if (boot_cpu_has(X86_FEATURE_PCID)) { | |
126 | - if (boot_cpu_has(X86_FEATURE_PGE)) { | |
127 | - /* | |
128 | - * This can't be cr4_set_bits_and_update_boot() -- | |
129 | - * the trampoline code can't handle CR4.PCIDE and | |
130 | - * it wouldn't do any good anyway. Despite the name, | |
131 | - * cr4_set_bits_and_update_boot() doesn't actually | |
132 | - * cause the bits in question to remain set all the | |
133 | - * way through the secondary boot asm. | |
134 | - * | |
135 | - * Instead, we brute-force it and set CR4.PCIDE | |
136 | - * manually in start_secondary(). | |
137 | - */ | |
138 | - cr4_set_bits(X86_CR4_PCIDE); | |
139 | - } else { | |
140 | - /* | |
141 | - * flush_tlb_all(), as currently implemented, won't | |
142 | - * work if PCID is on but PGE is not. Since that | |
143 | - * combination doesn't exist on real hardware, there's | |
144 | - * no reason to try to fully support it, but it's | |
145 | - * polite to avoid corrupting data if we're on | |
146 | - * an improperly configured VM. | |
147 | - */ | |
148 | - setup_clear_cpu_cap(X86_FEATURE_PCID); | |
149 | - } | |
150 | + if (!IS_ENABLED(CONFIG_X86_64)) | |
151 | + return; | |
152 | + | |
153 | + if (!boot_cpu_has(X86_FEATURE_PCID)) | |
154 | + return; | |
155 | + | |
156 | + if (boot_cpu_has(X86_FEATURE_PGE)) { | |
157 | + /* | |
158 | + * This can't be cr4_set_bits_and_update_boot() -- the | |
159 | + * trampoline code can't handle CR4.PCIDE and it wouldn't | |
160 | + * do any good anyway. Despite the name, | |
161 | + * cr4_set_bits_and_update_boot() doesn't actually cause | |
162 | + * the bits in question to remain set all the way through | |
163 | + * the secondary boot asm. | |
164 | + * | |
165 | + * Instead, we brute-force it and set CR4.PCIDE manually in | |
166 | + * start_secondary(). | |
167 | + */ | |
168 | + cr4_set_bits(X86_CR4_PCIDE); | |
169 | + | |
170 | + /* | |
171 | + * INVPCID's single-context modes (2/3) only work if we set | |
172 | + * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable | |
173 | + * on systems that have X86_CR4_PCIDE clear, or that have | |
174 | + * no INVPCID support at all. | |
175 | + */ | |
176 | + if (boot_cpu_has(X86_FEATURE_INVPCID)) | |
177 | + setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE); | |
178 | + } else { | |
179 | + /* | |
180 | + * flush_tlb_all(), as currently implemented, won't work if | |
181 | + * PCID is on but PGE is not. Since that combination | |
182 | + * doesn't exist on real hardware, there's no reason to try | |
183 | + * to fully support it, but it's polite to avoid corrupting | |
184 | + * data if we're on an improperly configured VM. | |
185 | + */ | |
186 | + setup_clear_cpu_cap(X86_FEATURE_PCID); | |
187 | } | |
188 | -#endif | |
189 | } | |
190 | ||
191 | #ifdef CONFIG_X86_32 | |
192 | -- | |
193 | 2.14.2 | |
194 |