]> git.proxmox.com Git - pve-kernel.git/blob - patches/kernel/0205-x86-events-intel-ds-Map-debug-buffers-in-cpu_entry_a.patch
ef639882f6fbca2a54aa953ef303b03011bfaabf
[pve-kernel.git] / patches / kernel / 0205-x86-events-intel-ds-Map-debug-buffers-in-cpu_entry_a.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Hugh Dickins <hughd@google.com>
3 Date: Mon, 4 Dec 2017 15:07:50 +0100
4 Subject: [PATCH] x86/events/intel/ds: Map debug buffers in cpu_entry_area
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 CVE-2017-5754
10
11 The BTS and PEBS buffers both have their virtual addresses programmed into
12 the hardware. This means that any access to them is performed via the page
13 tables. The times that the hardware accesses these are entirely dependent
14 on how the performance monitoring hardware events are set up. In other
15 words, there is no way for the kernel to tell when the hardware might
16 access these buffers.
17
18 To avoid perf crashes, place 'debug_store' allocate pages and map them into
19 the cpu_entry_area.
20
21 The PEBS fixup buffer does not need this treatment.
22
23 [ tglx: Got rid of the kaiser_add_mapping() complication ]
24
25 Signed-off-by: Hugh Dickins <hughd@google.com>
26 Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
27 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
28 Cc: Andy Lutomirski <luto@kernel.org>
29 Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
30 Cc: Borislav Petkov <bp@alien8.de>
31 Cc: Brian Gerst <brgerst@gmail.com>
32 Cc: David Laight <David.Laight@aculab.com>
33 Cc: Denys Vlasenko <dvlasenk@redhat.com>
34 Cc: Eduardo Valentin <eduval@amazon.com>
35 Cc: Greg KH <gregkh@linuxfoundation.org>
36 Cc: H. Peter Anvin <hpa@zytor.com>
37 Cc: Josh Poimboeuf <jpoimboe@redhat.com>
38 Cc: Juergen Gross <jgross@suse.com>
39 Cc: Linus Torvalds <torvalds@linux-foundation.org>
40 Cc: Peter Zijlstra <peterz@infradead.org>
41 Cc: Will Deacon <will.deacon@arm.com>
42 Cc: aliguori@amazon.com
43 Cc: daniel.gruss@iaik.tugraz.at
44 Cc: keescook@google.com
45 Signed-off-by: Ingo Molnar <mingo@kernel.org>
46 (cherry picked from commit c1961a4631daef4aeabee8e368b1b13e8f173c91)
47 Signed-off-by: Andy Whitcroft <apw@canonical.com>
48 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
49 (cherry picked from commit 569dedbb62e16e3268f006dcf745b8d27690ef91)
50 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
51 ---
52 arch/x86/events/perf_event.h | 2 +
53 arch/x86/events/intel/ds.c | 125 +++++++++++++++++++++++++++----------------
54 2 files changed, 82 insertions(+), 45 deletions(-)
55
56 diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
57 index 308bc14f58af..eb0876475f18 100644
58 --- a/arch/x86/events/perf_event.h
59 +++ b/arch/x86/events/perf_event.h
60 @@ -199,6 +199,8 @@ struct cpu_hw_events {
61 * Intel DebugStore bits
62 */
63 struct debug_store *ds;
64 + void *ds_pebs_vaddr;
65 + void *ds_bts_vaddr;
66 u64 pebs_enabled;
67 int n_pebs;
68 int n_large_pebs;
69 diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
70 index 21a4ed789ec0..85df1f12c49e 100644
71 --- a/arch/x86/events/intel/ds.c
72 +++ b/arch/x86/events/intel/ds.c
73 @@ -2,6 +2,7 @@
74 #include <linux/types.h>
75 #include <linux/slab.h>
76
77 +#include <asm/cpu_entry_area.h>
78 #include <asm/perf_event.h>
79 #include <asm/insn.h>
80
81 @@ -279,17 +280,52 @@ void fini_debug_store_on_cpu(int cpu)
82
83 static DEFINE_PER_CPU(void *, insn_buffer);
84
85 -static int alloc_pebs_buffer(int cpu)
86 +static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
87 {
88 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
89 + phys_addr_t pa;
90 + size_t msz = 0;
91 +
92 + pa = virt_to_phys(addr);
93 + for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
94 + cea_set_pte(cea, pa, prot);
95 +}
96 +
97 +static void ds_clear_cea(void *cea, size_t size)
98 +{
99 + size_t msz = 0;
100 +
101 + for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
102 + cea_set_pte(cea, 0, PAGE_NONE);
103 +}
104 +
105 +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
106 +{
107 + unsigned int order = get_order(size);
108 int node = cpu_to_node(cpu);
109 - int max;
110 - void *buffer, *ibuffer;
111 + struct page *page;
112 +
113 + page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
114 + return page ? page_address(page) : NULL;
115 +}
116 +
117 +static void dsfree_pages(const void *buffer, size_t size)
118 +{
119 + if (buffer)
120 + free_pages((unsigned long)buffer, get_order(size));
121 +}
122 +
123 +static int alloc_pebs_buffer(int cpu)
124 +{
125 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
126 + struct debug_store *ds = hwev->ds;
127 + size_t bsiz = x86_pmu.pebs_buffer_size;
128 + int max, node = cpu_to_node(cpu);
129 + void *buffer, *ibuffer, *cea;
130
131 if (!x86_pmu.pebs)
132 return 0;
133
134 - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
135 + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
136 if (unlikely(!buffer))
137 return -ENOMEM;
138
139 @@ -300,25 +336,27 @@ static int alloc_pebs_buffer(int cpu)
140 if (x86_pmu.intel_cap.pebs_format < 2) {
141 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
142 if (!ibuffer) {
143 - kfree(buffer);
144 + dsfree_pages(buffer, bsiz);
145 return -ENOMEM;
146 }
147 per_cpu(insn_buffer, cpu) = ibuffer;
148 }
149 -
150 - max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
151 -
152 - ds->pebs_buffer_base = (u64)(unsigned long)buffer;
153 + hwev->ds_pebs_vaddr = buffer;
154 + /* Update the cpu entry area mapping */
155 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
156 + ds->pebs_buffer_base = (unsigned long) cea;
157 + ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
158 ds->pebs_index = ds->pebs_buffer_base;
159 - ds->pebs_absolute_maximum = ds->pebs_buffer_base +
160 - max * x86_pmu.pebs_record_size;
161 -
162 + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
163 + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
164 return 0;
165 }
166
167 static void release_pebs_buffer(int cpu)
168 {
169 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
170 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
171 + struct debug_store *ds = hwev->ds;
172 + void *cea;
173
174 if (!ds || !x86_pmu.pebs)
175 return;
176 @@ -326,73 +364,70 @@ static void release_pebs_buffer(int cpu)
177 kfree(per_cpu(insn_buffer, cpu));
178 per_cpu(insn_buffer, cpu) = NULL;
179
180 - kfree((void *)(unsigned long)ds->pebs_buffer_base);
181 + /* Clear the fixmap */
182 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
183 + ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
184 ds->pebs_buffer_base = 0;
185 + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
186 + hwev->ds_pebs_vaddr = NULL;
187 }
188
189 static int alloc_bts_buffer(int cpu)
190 {
191 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
192 - int node = cpu_to_node(cpu);
193 - int max, thresh;
194 - void *buffer;
195 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
196 + struct debug_store *ds = hwev->ds;
197 + void *buffer, *cea;
198 + int max;
199
200 if (!x86_pmu.bts)
201 return 0;
202
203 - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
204 + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
205 if (unlikely(!buffer)) {
206 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
207 return -ENOMEM;
208 }
209 -
210 - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
211 - thresh = max / 16;
212 -
213 - ds->bts_buffer_base = (u64)(unsigned long)buffer;
214 + hwev->ds_bts_vaddr = buffer;
215 + /* Update the fixmap */
216 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
217 + ds->bts_buffer_base = (unsigned long) cea;
218 + ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
219 ds->bts_index = ds->bts_buffer_base;
220 - ds->bts_absolute_maximum = ds->bts_buffer_base +
221 - max * BTS_RECORD_SIZE;
222 - ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
223 - thresh * BTS_RECORD_SIZE;
224 -
225 + max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
226 + ds->bts_absolute_maximum = ds->bts_buffer_base + max;
227 + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
228 return 0;
229 }
230
231 static void release_bts_buffer(int cpu)
232 {
233 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
234 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
235 + struct debug_store *ds = hwev->ds;
236 + void *cea;
237
238 if (!ds || !x86_pmu.bts)
239 return;
240
241 - kfree((void *)(unsigned long)ds->bts_buffer_base);
242 + /* Clear the fixmap */
243 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
244 + ds_clear_cea(cea, BTS_BUFFER_SIZE);
245 ds->bts_buffer_base = 0;
246 + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
247 + hwev->ds_bts_vaddr = NULL;
248 }
249
250 static int alloc_ds_buffer(int cpu)
251 {
252 - int node = cpu_to_node(cpu);
253 - struct debug_store *ds;
254 -
255 - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
256 - if (unlikely(!ds))
257 - return -ENOMEM;
258 + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
259
260 + memset(ds, 0, sizeof(*ds));
261 per_cpu(cpu_hw_events, cpu).ds = ds;
262 -
263 return 0;
264 }
265
266 static void release_ds_buffer(int cpu)
267 {
268 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
269 -
270 - if (!ds)
271 - return;
272 -
273 per_cpu(cpu_hw_events, cpu).ds = NULL;
274 - kfree(ds);
275 }
276
277 void release_ds_buffers(void)
278 --
279 2.14.2
280