]>
Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
9d5171a8 RG |
2 | /* |
3 | * Copyright (C) IBM Corporation, 2014, 2017 | |
4 | * Anton Blanchard, Rashmica Gupta. | |
9d5171a8 RG |
5 | */ |
6 | ||
7 | #define pr_fmt(fmt) "memtrace: " fmt | |
8 | ||
9 | #include <linux/bitops.h> | |
10 | #include <linux/string.h> | |
11 | #include <linux/memblock.h> | |
12 | #include <linux/init.h> | |
13 | #include <linux/moduleparam.h> | |
14 | #include <linux/fs.h> | |
15 | #include <linux/debugfs.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/memory.h> | |
18 | #include <linux/memory_hotplug.h> | |
98fa15f3 | 19 | #include <linux/numa.h> |
9d5171a8 RG |
20 | #include <asm/machdep.h> |
21 | #include <asm/debugfs.h> | |
22 | ||
23 | /* This enables us to keep track of the memory removed from each node. */ | |
24 | struct memtrace_entry { | |
25 | void *mem; | |
26 | u64 start; | |
27 | u64 size; | |
28 | u32 nid; | |
29 | struct dentry *dir; | |
30 | char name[16]; | |
31 | }; | |
32 | ||
d6718941 | 33 | static DEFINE_MUTEX(memtrace_mutex); |
9d5171a8 RG |
34 | static u64 memtrace_size; |
35 | ||
36 | static struct memtrace_entry *memtrace_array; | |
37 | static unsigned int memtrace_array_nr; | |
38 | ||
39 | ||
40 | static ssize_t memtrace_read(struct file *filp, char __user *ubuf, | |
41 | size_t count, loff_t *ppos) | |
42 | { | |
43 | struct memtrace_entry *ent = filp->private_data; | |
44 | ||
45 | return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); | |
46 | } | |
47 | ||
9d5171a8 RG |
48 | static const struct file_operations memtrace_fops = { |
49 | .llseek = default_llseek, | |
50 | .read = memtrace_read, | |
9d5171a8 RG |
51 | .open = simple_open, |
52 | }; | |
53 | ||
c74cf7a3 DH |
54 | static void memtrace_clear_range(unsigned long start_pfn, |
55 | unsigned long nr_pages) | |
56 | { | |
57 | unsigned long pfn; | |
58 | ||
0bd4b96d | 59 | /* As HIGHMEM does not apply, use clear_page() directly. */ |
c74cf7a3 DH |
60 | for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) { |
61 | if (IS_ALIGNED(pfn, PAGES_PER_SECTION)) | |
62 | cond_resched(); | |
63 | clear_page(__va(PFN_PHYS(pfn))); | |
64 | } | |
65 | } | |
66 | ||
9d5171a8 RG |
67 | static u64 memtrace_alloc_node(u32 nid, u64 size) |
68 | { | |
0bd4b96d DH |
69 | const unsigned long nr_pages = PHYS_PFN(size); |
70 | unsigned long pfn, start_pfn; | |
71 | struct page *page; | |
9d5171a8 | 72 | |
0bd4b96d DH |
73 | /* |
74 | * Trace memory needs to be aligned to the size, which is guaranteed | |
75 | * by alloc_contig_pages(). | |
76 | */ | |
77 | page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE | | |
78 | __GFP_NOWARN, nid, NULL); | |
79 | if (!page) | |
9d5171a8 | 80 | return 0; |
0bd4b96d | 81 | start_pfn = page_to_pfn(page); |
9d5171a8 | 82 | |
0bd4b96d DH |
83 | /* |
84 | * Clear the range while we still have a linear mapping. | |
85 | * | |
86 | * TODO: use __GFP_ZERO with alloc_contig_pages() once supported. | |
87 | */ | |
88 | memtrace_clear_range(start_pfn, nr_pages); | |
9d5171a8 | 89 | |
0bd4b96d DH |
90 | /* |
91 | * Set pages PageOffline(), to indicate that nobody (e.g., hibernation, | |
92 | * dumping, ...) should be touching these pages. | |
93 | */ | |
94 | for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) | |
95 | __SetPageOffline(pfn_to_page(pfn)); | |
96 | ||
97 | arch_remove_linear_mapping(PFN_PHYS(start_pfn), size); | |
98 | ||
99 | return PFN_PHYS(start_pfn); | |
9d5171a8 RG |
100 | } |
101 | ||
102 | static int memtrace_init_regions_runtime(u64 size) | |
103 | { | |
104 | u32 nid; | |
105 | u64 m; | |
106 | ||
107 | memtrace_array = kcalloc(num_online_nodes(), | |
108 | sizeof(struct memtrace_entry), GFP_KERNEL); | |
109 | if (!memtrace_array) { | |
110 | pr_err("Failed to allocate memtrace_array\n"); | |
111 | return -EINVAL; | |
112 | } | |
113 | ||
114 | for_each_online_node(nid) { | |
115 | m = memtrace_alloc_node(nid, size); | |
116 | ||
117 | /* | |
118 | * A node might not have any local memory, so warn but | |
119 | * continue on. | |
120 | */ | |
121 | if (!m) { | |
122 | pr_err("Failed to allocate trace memory on node %d\n", nid); | |
123 | continue; | |
124 | } | |
125 | ||
126 | pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); | |
127 | ||
128 | memtrace_array[memtrace_array_nr].start = m; | |
129 | memtrace_array[memtrace_array_nr].size = size; | |
130 | memtrace_array[memtrace_array_nr].nid = nid; | |
131 | memtrace_array_nr++; | |
132 | } | |
133 | ||
134 | return 0; | |
135 | } | |
136 | ||
137 | static struct dentry *memtrace_debugfs_dir; | |
138 | ||
139 | static int memtrace_init_debugfs(void) | |
140 | { | |
141 | int ret = 0; | |
142 | int i; | |
143 | ||
144 | for (i = 0; i < memtrace_array_nr; i++) { | |
145 | struct dentry *dir; | |
146 | struct memtrace_entry *ent = &memtrace_array[i]; | |
147 | ||
148 | ent->mem = ioremap(ent->start, ent->size); | |
149 | /* Warn but continue on */ | |
150 | if (!ent->mem) { | |
151 | pr_err("Failed to map trace memory at 0x%llx\n", | |
152 | ent->start); | |
153 | ret = -1; | |
154 | continue; | |
155 | } | |
156 | ||
157 | snprintf(ent->name, 16, "%08x", ent->nid); | |
158 | dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); | |
9d5171a8 RG |
159 | |
160 | ent->dir = dir; | |
161 | debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); | |
162 | debugfs_create_x64("start", 0400, dir, &ent->start); | |
163 | debugfs_create_x64("size", 0400, dir, &ent->size); | |
164 | } | |
165 | ||
166 | return ret; | |
167 | } | |
168 | ||
0bd4b96d | 169 | static int memtrace_free(int nid, u64 start, u64 size) |
d3da701d | 170 | { |
0bd4b96d DH |
171 | struct mhp_params params = { .pgprot = PAGE_KERNEL }; |
172 | const unsigned long nr_pages = PHYS_PFN(size); | |
173 | const unsigned long start_pfn = PHYS_PFN(start); | |
174 | unsigned long pfn; | |
175 | int ret; | |
176 | ||
177 | ret = arch_create_linear_mapping(nid, start, size, ¶ms); | |
178 | if (ret) | |
179 | return ret; | |
180 | ||
181 | for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) | |
182 | __ClearPageOffline(pfn_to_page(pfn)); | |
183 | ||
184 | free_contig_range(start_pfn, nr_pages); | |
185 | return 0; | |
d3da701d RG |
186 | } |
187 | ||
188 | /* | |
0bd4b96d DH |
189 | * Iterate through the chunks of memory we allocated and attempt to expose |
190 | * them back to the kernel. | |
d3da701d | 191 | */ |
0bd4b96d | 192 | static int memtrace_free_regions(void) |
d3da701d RG |
193 | { |
194 | int i, ret = 0; | |
195 | struct memtrace_entry *ent; | |
196 | ||
197 | for (i = memtrace_array_nr - 1; i >= 0; i--) { | |
198 | ent = &memtrace_array[i]; | |
199 | ||
0bd4b96d | 200 | /* We have freed this chunk previously */ |
98fa15f3 | 201 | if (ent->nid == NUMA_NO_NODE) |
d3da701d RG |
202 | continue; |
203 | ||
204 | /* Remove from io mappings */ | |
205 | if (ent->mem) { | |
206 | iounmap(ent->mem); | |
207 | ent->mem = 0; | |
208 | } | |
209 | ||
0bd4b96d DH |
210 | if (memtrace_free(ent->nid, ent->start, ent->size)) { |
211 | pr_err("Failed to free trace memory on node %d\n", | |
d3da701d RG |
212 | ent->nid); |
213 | ret += 1; | |
214 | continue; | |
215 | } | |
216 | ||
d3da701d | 217 | /* |
0bd4b96d DH |
218 | * Memory was freed successfully so clean up references to it |
219 | * so on reentry we can tell that this chunk was freed. | |
d3da701d RG |
220 | */ |
221 | debugfs_remove_recursive(ent->dir); | |
0bd4b96d | 222 | pr_info("Freed trace memory back on node %d\n", ent->nid); |
98fa15f3 | 223 | ent->size = ent->start = ent->nid = NUMA_NO_NODE; |
d3da701d RG |
224 | } |
225 | if (ret) | |
226 | return ret; | |
227 | ||
0bd4b96d | 228 | /* If all chunks of memory were freed successfully, reset globals */ |
d3da701d RG |
229 | kfree(memtrace_array); |
230 | memtrace_array = NULL; | |
231 | memtrace_size = 0; | |
232 | memtrace_array_nr = 0; | |
233 | return 0; | |
234 | } | |
235 | ||
9d5171a8 RG |
236 | static int memtrace_enable_set(void *data, u64 val) |
237 | { | |
d6718941 | 238 | int rc = -EAGAIN; |
d3da701d RG |
239 | u64 bytes; |
240 | ||
241 | /* | |
242 | * Don't attempt to do anything if size isn't aligned to a memory | |
243 | * block or equal to zero. | |
244 | */ | |
245 | bytes = memory_block_size_bytes(); | |
246 | if (val & (bytes - 1)) { | |
247 | pr_err("Value must be aligned with 0x%llx\n", bytes); | |
9d5171a8 | 248 | return -EINVAL; |
d3da701d | 249 | } |
9d5171a8 | 250 | |
d6718941 DH |
251 | mutex_lock(&memtrace_mutex); |
252 | ||
0bd4b96d DH |
253 | /* Free all previously allocated memory. */ |
254 | if (memtrace_size && memtrace_free_regions()) | |
255 | goto out_unlock; | |
9d5171a8 | 256 | |
d6718941 DH |
257 | if (!val) { |
258 | rc = 0; | |
259 | goto out_unlock; | |
260 | } | |
9d5171a8 | 261 | |
0bd4b96d | 262 | /* Allocate memory. */ |
9d5171a8 | 263 | if (memtrace_init_regions_runtime(val)) |
d6718941 | 264 | goto out_unlock; |
9d5171a8 RG |
265 | |
266 | if (memtrace_init_debugfs()) | |
d6718941 | 267 | goto out_unlock; |
9d5171a8 RG |
268 | |
269 | memtrace_size = val; | |
d6718941 DH |
270 | rc = 0; |
271 | out_unlock: | |
272 | mutex_unlock(&memtrace_mutex); | |
273 | return rc; | |
9d5171a8 RG |
274 | } |
275 | ||
276 | static int memtrace_enable_get(void *data, u64 *val) | |
277 | { | |
278 | *val = memtrace_size; | |
279 | return 0; | |
280 | } | |
281 | ||
282 | DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, | |
283 | memtrace_enable_set, "0x%016llx\n"); | |
284 | ||
285 | static int memtrace_init(void) | |
286 | { | |
287 | memtrace_debugfs_dir = debugfs_create_dir("memtrace", | |
288 | powerpc_debugfs_root); | |
9d5171a8 RG |
289 | |
290 | debugfs_create_file("enable", 0600, memtrace_debugfs_dir, | |
291 | NULL, &memtrace_init_fops); | |
292 | ||
293 | return 0; | |
294 | } | |
295 | machine_device_initcall(powernv, memtrace_init); |