1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Hugh Dickins <hughd@google.com>
3 Date: Mon, 4 Dec 2017 15:07:50 +0100
4 Subject: [PATCH] x86/events/intel/ds: Map debug buffers in cpu_entry_area
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
11 The BTS and PEBS buffers both have their virtual addresses programmed into
12 the hardware. This means that any access to them is performed via the page
13 tables. The times that the hardware accesses these are entirely dependent
14 on how the performance monitoring hardware events are set up. In other
15 words, there is no way for the kernel to tell when the hardware might
18 To avoid perf crashes, place 'debug_store' allocate pages and map them into
21 The PEBS fixup buffer does not need this treatment.
23 [ tglx: Got rid of the kaiser_add_mapping() complication ]
25 Signed-off-by: Hugh Dickins <hughd@google.com>
26 Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
27 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
28 Cc: Andy Lutomirski <luto@kernel.org>
29 Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
30 Cc: Borislav Petkov <bp@alien8.de>
31 Cc: Brian Gerst <brgerst@gmail.com>
32 Cc: David Laight <David.Laight@aculab.com>
33 Cc: Denys Vlasenko <dvlasenk@redhat.com>
34 Cc: Eduardo Valentin <eduval@amazon.com>
35 Cc: Greg KH <gregkh@linuxfoundation.org>
36 Cc: H. Peter Anvin <hpa@zytor.com>
37 Cc: Josh Poimboeuf <jpoimboe@redhat.com>
38 Cc: Juergen Gross <jgross@suse.com>
39 Cc: Linus Torvalds <torvalds@linux-foundation.org>
40 Cc: Peter Zijlstra <peterz@infradead.org>
41 Cc: Will Deacon <will.deacon@arm.com>
42 Cc: aliguori@amazon.com
43 Cc: daniel.gruss@iaik.tugraz.at
44 Cc: keescook@google.com
45 Signed-off-by: Ingo Molnar <mingo@kernel.org>
46 (cherry picked from commit c1961a4631daef4aeabee8e368b1b13e8f173c91)
47 Signed-off-by: Andy Whitcroft <apw@canonical.com>
48 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
49 (cherry picked from commit 569dedbb62e16e3268f006dcf745b8d27690ef91)
50 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
52 arch/x86/events/perf_event.h | 2 +
53 arch/x86/events/intel/ds.c | 125 +++++++++++++++++++++++++++----------------
54 2 files changed, 82 insertions(+), 45 deletions(-)
56 diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
57 index 308bc14f58af..eb0876475f18 100644
58 --- a/arch/x86/events/perf_event.h
59 +++ b/arch/x86/events/perf_event.h
60 @@ -199,6 +199,8 @@ struct cpu_hw_events {
61 * Intel DebugStore bits
63 struct debug_store *ds;
64 + void *ds_pebs_vaddr;
69 diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
70 index 21a4ed789ec0..85df1f12c49e 100644
71 --- a/arch/x86/events/intel/ds.c
72 +++ b/arch/x86/events/intel/ds.c
74 #include <linux/types.h>
75 #include <linux/slab.h>
77 +#include <asm/cpu_entry_area.h>
78 #include <asm/perf_event.h>
81 @@ -279,17 +280,52 @@ void fini_debug_store_on_cpu(int cpu)
83 static DEFINE_PER_CPU(void *, insn_buffer);
85 -static int alloc_pebs_buffer(int cpu)
86 +static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
88 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
92 + pa = virt_to_phys(addr);
93 + for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
94 + cea_set_pte(cea, pa, prot);
97 +static void ds_clear_cea(void *cea, size_t size)
101 + for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
102 + cea_set_pte(cea, 0, PAGE_NONE);
105 +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
107 + unsigned int order = get_order(size);
108 int node = cpu_to_node(cpu);
110 - void *buffer, *ibuffer;
113 + page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
114 + return page ? page_address(page) : NULL;
117 +static void dsfree_pages(const void *buffer, size_t size)
120 + free_pages((unsigned long)buffer, get_order(size));
123 +static int alloc_pebs_buffer(int cpu)
125 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
126 + struct debug_store *ds = hwev->ds;
127 + size_t bsiz = x86_pmu.pebs_buffer_size;
128 + int max, node = cpu_to_node(cpu);
129 + void *buffer, *ibuffer, *cea;
134 - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
135 + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
136 if (unlikely(!buffer))
139 @@ -300,25 +336,27 @@ static int alloc_pebs_buffer(int cpu)
140 if (x86_pmu.intel_cap.pebs_format < 2) {
141 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
144 + dsfree_pages(buffer, bsiz);
147 per_cpu(insn_buffer, cpu) = ibuffer;
150 - max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
152 - ds->pebs_buffer_base = (u64)(unsigned long)buffer;
153 + hwev->ds_pebs_vaddr = buffer;
154 + /* Update the cpu entry area mapping */
155 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
156 + ds->pebs_buffer_base = (unsigned long) cea;
157 + ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
158 ds->pebs_index = ds->pebs_buffer_base;
159 - ds->pebs_absolute_maximum = ds->pebs_buffer_base +
160 - max * x86_pmu.pebs_record_size;
162 + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
163 + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
167 static void release_pebs_buffer(int cpu)
169 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
170 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
171 + struct debug_store *ds = hwev->ds;
174 if (!ds || !x86_pmu.pebs)
176 @@ -326,73 +364,70 @@ static void release_pebs_buffer(int cpu)
177 kfree(per_cpu(insn_buffer, cpu));
178 per_cpu(insn_buffer, cpu) = NULL;
180 - kfree((void *)(unsigned long)ds->pebs_buffer_base);
181 + /* Clear the fixmap */
182 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
183 + ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
184 ds->pebs_buffer_base = 0;
185 + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
186 + hwev->ds_pebs_vaddr = NULL;
189 static int alloc_bts_buffer(int cpu)
191 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
192 - int node = cpu_to_node(cpu);
195 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
196 + struct debug_store *ds = hwev->ds;
197 + void *buffer, *cea;
203 - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
204 + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
205 if (unlikely(!buffer)) {
206 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
210 - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
213 - ds->bts_buffer_base = (u64)(unsigned long)buffer;
214 + hwev->ds_bts_vaddr = buffer;
215 + /* Update the fixmap */
216 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
217 + ds->bts_buffer_base = (unsigned long) cea;
218 + ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
219 ds->bts_index = ds->bts_buffer_base;
220 - ds->bts_absolute_maximum = ds->bts_buffer_base +
221 - max * BTS_RECORD_SIZE;
222 - ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
223 - thresh * BTS_RECORD_SIZE;
225 + max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
226 + ds->bts_absolute_maximum = ds->bts_buffer_base + max;
227 + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
231 static void release_bts_buffer(int cpu)
233 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
234 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
235 + struct debug_store *ds = hwev->ds;
238 if (!ds || !x86_pmu.bts)
241 - kfree((void *)(unsigned long)ds->bts_buffer_base);
242 + /* Clear the fixmap */
243 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
244 + ds_clear_cea(cea, BTS_BUFFER_SIZE);
245 ds->bts_buffer_base = 0;
246 + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
247 + hwev->ds_bts_vaddr = NULL;
250 static int alloc_ds_buffer(int cpu)
252 - int node = cpu_to_node(cpu);
253 - struct debug_store *ds;
255 - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
258 + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
260 + memset(ds, 0, sizeof(*ds));
261 per_cpu(cpu_hw_events, cpu).ds = ds;
266 static void release_ds_buffer(int cpu)
268 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
273 per_cpu(cpu_hw_events, cpu).ds = NULL;
277 void release_ds_buffers(void)