]> git.proxmox.com Git - pve-kernel.git/blob - patches/kernel/0211-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch
add objtool build fix
[pve-kernel.git] / patches / kernel / 0211-x86-mm-Use-INVPCID-for-__native_flush_tlb_single.patch
1 From 534c2338c3f72069910c06eba7693a4a1d15faf8 Mon Sep 17 00:00:00 2001
2 From: Dave Hansen <dave.hansen@linux.intel.com>
3 Date: Mon, 4 Dec 2017 15:08:01 +0100
4 Subject: [PATCH 211/233] x86/mm: Use INVPCID for __native_flush_tlb_single()
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 CVE-2017-5754
10
11 This uses INVPCID to shoot down individual lines of the user mapping
12 instead of marking the entire user map as invalid. This
13 could/might/possibly be faster.
14
15 This for sure needs tlb_single_page_flush_ceiling to be redetermined;
16 esp. since INVPCID is _slow_.
17
18 A detailed performance analysis is available here:
19
20 https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com
21
22 [ Peterz: Split out from big combo patch ]
23
24 Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
25 Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
26 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
27 Cc: Andy Lutomirski <luto@kernel.org>
28 Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
29 Cc: Borislav Petkov <bp@alien8.de>
30 Cc: Brian Gerst <brgerst@gmail.com>
31 Cc: Denys Vlasenko <dvlasenk@redhat.com>
32 Cc: Eduardo Valentin <eduval@amazon.com>
33 Cc: Greg KH <gregkh@linuxfoundation.org>
34 Cc: H. Peter Anvin <hpa@zytor.com>
35 Cc: Josh Poimboeuf <jpoimboe@redhat.com>
36 Cc: Juergen Gross <jgross@suse.com>
37 Cc: Linus Torvalds <torvalds@linux-foundation.org>
38 Cc: Peter Zijlstra <peterz@infradead.org>
39 Cc: Will Deacon <will.deacon@arm.com>
40 Cc: aliguori@amazon.com
41 Cc: daniel.gruss@iaik.tugraz.at
42 Cc: hughd@google.com
43 Cc: keescook@google.com
44 Signed-off-by: Ingo Molnar <mingo@kernel.org>
45 (cherry picked from commit 6cff64b86aaaa07f89f50498055a20e45754b0c1)
46 Signed-off-by: Andy Whitcroft <apw@canonical.com>
47 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
48 (cherry picked from commit e4986a4e89c0eb40f824a8505feefff3328ad4b2)
49 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
50 ---
51 arch/x86/include/asm/cpufeatures.h | 1 +
52 arch/x86/include/asm/tlbflush.h | 23 +++++++++++++-
53 arch/x86/mm/init.c | 64 ++++++++++++++++++++++----------------
54 3 files changed, 60 insertions(+), 28 deletions(-)
55
56 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
57 index de4e91452de4..9b0c283afcf0 100644
58 --- a/arch/x86/include/asm/cpufeatures.h
59 +++ b/arch/x86/include/asm/cpufeatures.h
60 @@ -196,6 +196,7 @@
61 #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
62 #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
63 #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
64 +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
65
66 #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
67 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
68 diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
69 index 2b7b32c243f1..979e590648a5 100644
70 --- a/arch/x86/include/asm/tlbflush.h
71 +++ b/arch/x86/include/asm/tlbflush.h
72 @@ -84,6 +84,18 @@ static inline u16 kern_pcid(u16 asid)
73 return asid + 1;
74 }
75
76 +/*
77 + * The user PCID is just the kernel one, plus the "switch bit".
78 + */
79 +static inline u16 user_pcid(u16 asid)
80 +{
81 + u16 ret = kern_pcid(asid);
82 +#ifdef CONFIG_PAGE_TABLE_ISOLATION
83 + ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
84 +#endif
85 + return ret;
86 +}
87 +
88 struct pgd_t;
89 static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
90 {
91 @@ -324,6 +336,8 @@ static inline void __native_flush_tlb_global(void)
92 /*
93 * Using INVPCID is considerably faster than a pair of writes
94 * to CR4 sandwiched inside an IRQ flag save/restore.
95 + *
96 + * Note, this works with CR4.PCIDE=0 or 1.
97 */
98 invpcid_flush_all();
99 return;
100 @@ -357,7 +371,14 @@ static inline void __native_flush_tlb_single(unsigned long addr)
101 if (!static_cpu_has(X86_FEATURE_PTI))
102 return;
103
104 - invalidate_user_asid(loaded_mm_asid);
105 + /*
106 + * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
107 + * Just use invalidate_user_asid() in case we are called early.
108 + */
109 + if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
110 + invalidate_user_asid(loaded_mm_asid);
111 + else
112 + invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
113 }
114
115 /*
116 diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
117 index caeb8a7bf0a4..80259ad8c386 100644
118 --- a/arch/x86/mm/init.c
119 +++ b/arch/x86/mm/init.c
120 @@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void)
121
122 static void setup_pcid(void)
123 {
124 -#ifdef CONFIG_X86_64
125 - if (boot_cpu_has(X86_FEATURE_PCID)) {
126 - if (boot_cpu_has(X86_FEATURE_PGE)) {
127 - /*
128 - * This can't be cr4_set_bits_and_update_boot() --
129 - * the trampoline code can't handle CR4.PCIDE and
130 - * it wouldn't do any good anyway. Despite the name,
131 - * cr4_set_bits_and_update_boot() doesn't actually
132 - * cause the bits in question to remain set all the
133 - * way through the secondary boot asm.
134 - *
135 - * Instead, we brute-force it and set CR4.PCIDE
136 - * manually in start_secondary().
137 - */
138 - cr4_set_bits(X86_CR4_PCIDE);
139 - } else {
140 - /*
141 - * flush_tlb_all(), as currently implemented, won't
142 - * work if PCID is on but PGE is not. Since that
143 - * combination doesn't exist on real hardware, there's
144 - * no reason to try to fully support it, but it's
145 - * polite to avoid corrupting data if we're on
146 - * an improperly configured VM.
147 - */
148 - setup_clear_cpu_cap(X86_FEATURE_PCID);
149 - }
150 + if (!IS_ENABLED(CONFIG_X86_64))
151 + return;
152 +
153 + if (!boot_cpu_has(X86_FEATURE_PCID))
154 + return;
155 +
156 + if (boot_cpu_has(X86_FEATURE_PGE)) {
157 + /*
158 + * This can't be cr4_set_bits_and_update_boot() -- the
159 + * trampoline code can't handle CR4.PCIDE and it wouldn't
160 + * do any good anyway. Despite the name,
161 + * cr4_set_bits_and_update_boot() doesn't actually cause
162 + * the bits in question to remain set all the way through
163 + * the secondary boot asm.
164 + *
165 + * Instead, we brute-force it and set CR4.PCIDE manually in
166 + * start_secondary().
167 + */
168 + cr4_set_bits(X86_CR4_PCIDE);
169 +
170 + /*
171 + * INVPCID's single-context modes (2/3) only work if we set
172 + * X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
173 + * on systems that have X86_CR4_PCIDE clear, or that have
174 + * no INVPCID support at all.
175 + */
176 + if (boot_cpu_has(X86_FEATURE_INVPCID))
177 + setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
178 + } else {
179 + /*
180 + * flush_tlb_all(), as currently implemented, won't work if
181 + * PCID is on but PGE is not. Since that combination
182 + * doesn't exist on real hardware, there's no reason to try
183 + * to fully support it, but it's polite to avoid corrupting
184 + * data if we're on an improperly configured VM.
185 + */
186 + setup_clear_cpu_cap(X86_FEATURE_PCID);
187 }
188 -#endif
189 }
190
191 #ifdef CONFIG_X86_32
192 --
193 2.14.2
194