1 From 68338a3b7267b4fc346630b2d82a3599b5fbf54e Mon Sep 17 00:00:00 2001
2 From: Hugh Dickins <hughd@google.com>
3 Date: Mon, 4 Dec 2017 15:07:50 +0100
4 Subject: [PATCH 203/233] x86/events/intel/ds: Map debug buffers in
7 Content-Type: text/plain; charset=UTF-8
8 Content-Transfer-Encoding: 8bit
12 The BTS and PEBS buffers both have their virtual addresses programmed into
13 the hardware. This means that any access to them is performed via the page
14 tables. The times that the hardware accesses these are entirely dependent
15 on how the performance monitoring hardware events are set up. In other
16 words, there is no way for the kernel to tell when the hardware might
19 To avoid perf crashes, place 'debug_store' allocate pages and map them into
22 The PEBS fixup buffer does not need this treatment.
24 [ tglx: Got rid of the kaiser_add_mapping() complication ]
26 Signed-off-by: Hugh Dickins <hughd@google.com>
27 Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
28 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
29 Cc: Andy Lutomirski <luto@kernel.org>
30 Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
31 Cc: Borislav Petkov <bp@alien8.de>
32 Cc: Brian Gerst <brgerst@gmail.com>
33 Cc: David Laight <David.Laight@aculab.com>
34 Cc: Denys Vlasenko <dvlasenk@redhat.com>
35 Cc: Eduardo Valentin <eduval@amazon.com>
36 Cc: Greg KH <gregkh@linuxfoundation.org>
37 Cc: H. Peter Anvin <hpa@zytor.com>
38 Cc: Josh Poimboeuf <jpoimboe@redhat.com>
39 Cc: Juergen Gross <jgross@suse.com>
40 Cc: Linus Torvalds <torvalds@linux-foundation.org>
41 Cc: Peter Zijlstra <peterz@infradead.org>
42 Cc: Will Deacon <will.deacon@arm.com>
43 Cc: aliguori@amazon.com
44 Cc: daniel.gruss@iaik.tugraz.at
45 Cc: keescook@google.com
46 Signed-off-by: Ingo Molnar <mingo@kernel.org>
47 (cherry picked from commit c1961a4631daef4aeabee8e368b1b13e8f173c91)
48 Signed-off-by: Andy Whitcroft <apw@canonical.com>
49 Signed-off-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
50 (cherry picked from commit 569dedbb62e16e3268f006dcf745b8d27690ef91)
51 Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
53 arch/x86/events/perf_event.h | 2 +
54 arch/x86/events/intel/ds.c | 125 +++++++++++++++++++++++++++----------------
55 2 files changed, 82 insertions(+), 45 deletions(-)
57 diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
58 index 308bc14f58af..eb0876475f18 100644
59 --- a/arch/x86/events/perf_event.h
60 +++ b/arch/x86/events/perf_event.h
61 @@ -199,6 +199,8 @@ struct cpu_hw_events {
62 * Intel DebugStore bits
64 struct debug_store *ds;
65 + void *ds_pebs_vaddr;
70 diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
71 index 21a4ed789ec0..85df1f12c49e 100644
72 --- a/arch/x86/events/intel/ds.c
73 +++ b/arch/x86/events/intel/ds.c
75 #include <linux/types.h>
76 #include <linux/slab.h>
78 +#include <asm/cpu_entry_area.h>
79 #include <asm/perf_event.h>
82 @@ -279,17 +280,52 @@ void fini_debug_store_on_cpu(int cpu)
84 static DEFINE_PER_CPU(void *, insn_buffer);
86 -static int alloc_pebs_buffer(int cpu)
87 +static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
89 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
93 + pa = virt_to_phys(addr);
94 + for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
95 + cea_set_pte(cea, pa, prot);
98 +static void ds_clear_cea(void *cea, size_t size)
102 + for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
103 + cea_set_pte(cea, 0, PAGE_NONE);
106 +static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
108 + unsigned int order = get_order(size);
109 int node = cpu_to_node(cpu);
111 - void *buffer, *ibuffer;
114 + page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
115 + return page ? page_address(page) : NULL;
118 +static void dsfree_pages(const void *buffer, size_t size)
121 + free_pages((unsigned long)buffer, get_order(size));
124 +static int alloc_pebs_buffer(int cpu)
126 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
127 + struct debug_store *ds = hwev->ds;
128 + size_t bsiz = x86_pmu.pebs_buffer_size;
129 + int max, node = cpu_to_node(cpu);
130 + void *buffer, *ibuffer, *cea;
135 - buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
136 + buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
137 if (unlikely(!buffer))
140 @@ -300,25 +336,27 @@ static int alloc_pebs_buffer(int cpu)
141 if (x86_pmu.intel_cap.pebs_format < 2) {
142 ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
145 + dsfree_pages(buffer, bsiz);
148 per_cpu(insn_buffer, cpu) = ibuffer;
151 - max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
153 - ds->pebs_buffer_base = (u64)(unsigned long)buffer;
154 + hwev->ds_pebs_vaddr = buffer;
155 + /* Update the cpu entry area mapping */
156 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
157 + ds->pebs_buffer_base = (unsigned long) cea;
158 + ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
159 ds->pebs_index = ds->pebs_buffer_base;
160 - ds->pebs_absolute_maximum = ds->pebs_buffer_base +
161 - max * x86_pmu.pebs_record_size;
163 + max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
164 + ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
168 static void release_pebs_buffer(int cpu)
170 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
171 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
172 + struct debug_store *ds = hwev->ds;
175 if (!ds || !x86_pmu.pebs)
177 @@ -326,73 +364,70 @@ static void release_pebs_buffer(int cpu)
178 kfree(per_cpu(insn_buffer, cpu));
179 per_cpu(insn_buffer, cpu) = NULL;
181 - kfree((void *)(unsigned long)ds->pebs_buffer_base);
182 + /* Clear the fixmap */
183 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
184 + ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
185 ds->pebs_buffer_base = 0;
186 + dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
187 + hwev->ds_pebs_vaddr = NULL;
190 static int alloc_bts_buffer(int cpu)
192 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
193 - int node = cpu_to_node(cpu);
196 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
197 + struct debug_store *ds = hwev->ds;
198 + void *buffer, *cea;
204 - buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
205 + buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
206 if (unlikely(!buffer)) {
207 WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
211 - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
214 - ds->bts_buffer_base = (u64)(unsigned long)buffer;
215 + hwev->ds_bts_vaddr = buffer;
216 + /* Update the fixmap */
217 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
218 + ds->bts_buffer_base = (unsigned long) cea;
219 + ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
220 ds->bts_index = ds->bts_buffer_base;
221 - ds->bts_absolute_maximum = ds->bts_buffer_base +
222 - max * BTS_RECORD_SIZE;
223 - ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
224 - thresh * BTS_RECORD_SIZE;
226 + max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
227 + ds->bts_absolute_maximum = ds->bts_buffer_base + max;
228 + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
232 static void release_bts_buffer(int cpu)
234 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
235 + struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
236 + struct debug_store *ds = hwev->ds;
239 if (!ds || !x86_pmu.bts)
242 - kfree((void *)(unsigned long)ds->bts_buffer_base);
243 + /* Clear the fixmap */
244 + cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
245 + ds_clear_cea(cea, BTS_BUFFER_SIZE);
246 ds->bts_buffer_base = 0;
247 + dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
248 + hwev->ds_bts_vaddr = NULL;
251 static int alloc_ds_buffer(int cpu)
253 - int node = cpu_to_node(cpu);
254 - struct debug_store *ds;
256 - ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
259 + struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
261 + memset(ds, 0, sizeof(*ds));
262 per_cpu(cpu_hw_events, cpu).ds = ds;
267 static void release_ds_buffer(int cpu)
269 - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
274 per_cpu(cpu_hw_events, cpu).ds = NULL;
278 void release_ds_buffers(void)