]>
Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
9d5171a8 RG |
2 | /* |
3 | * Copyright (C) IBM Corporation, 2014, 2017 | |
4 | * Anton Blanchard, Rashmica Gupta. | |
9d5171a8 RG |
5 | */ |
6 | ||
7 | #define pr_fmt(fmt) "memtrace: " fmt | |
8 | ||
9 | #include <linux/bitops.h> | |
10 | #include <linux/string.h> | |
11 | #include <linux/memblock.h> | |
12 | #include <linux/init.h> | |
13 | #include <linux/moduleparam.h> | |
14 | #include <linux/fs.h> | |
15 | #include <linux/debugfs.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/memory.h> | |
18 | #include <linux/memory_hotplug.h> | |
98fa15f3 | 19 | #include <linux/numa.h> |
9d5171a8 RG |
20 | #include <asm/machdep.h> |
21 | #include <asm/debugfs.h> | |
22 | ||
23 | /* This enables us to keep track of the memory removed from each node. */ | |
24 | struct memtrace_entry { | |
25 | void *mem; | |
26 | u64 start; | |
27 | u64 size; | |
28 | u32 nid; | |
29 | struct dentry *dir; | |
30 | char name[16]; | |
31 | }; | |
32 | ||
33 | static u64 memtrace_size; | |
34 | ||
35 | static struct memtrace_entry *memtrace_array; | |
36 | static unsigned int memtrace_array_nr; | |
37 | ||
38 | ||
39 | static ssize_t memtrace_read(struct file *filp, char __user *ubuf, | |
40 | size_t count, loff_t *ppos) | |
41 | { | |
42 | struct memtrace_entry *ent = filp->private_data; | |
43 | ||
44 | return simple_read_from_buffer(ubuf, count, ppos, ent->mem, ent->size); | |
45 | } | |
46 | ||
9d5171a8 RG |
47 | static const struct file_operations memtrace_fops = { |
48 | .llseek = default_llseek, | |
49 | .read = memtrace_read, | |
9d5171a8 RG |
50 | .open = simple_open, |
51 | }; | |
52 | ||
9d5171a8 RG |
53 | static int check_memblock_online(struct memory_block *mem, void *arg) |
54 | { | |
55 | if (mem->state != MEM_ONLINE) | |
56 | return -1; | |
57 | ||
58 | return 0; | |
59 | } | |
60 | ||
61 | static int change_memblock_state(struct memory_block *mem, void *arg) | |
62 | { | |
63 | unsigned long state = (unsigned long)arg; | |
64 | ||
65 | mem->state = state; | |
66 | ||
67 | return 0; | |
68 | } | |
69 | ||
56668487 | 70 | /* called with device_hotplug_lock held */ |
9d5171a8 RG |
71 | static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) |
72 | { | |
73 | u64 end_pfn = start_pfn + nr_pages - 1; | |
74 | ||
75 | if (walk_memory_range(start_pfn, end_pfn, NULL, | |
76 | check_memblock_online)) | |
77 | return false; | |
78 | ||
79 | walk_memory_range(start_pfn, end_pfn, (void *)MEM_GOING_OFFLINE, | |
80 | change_memblock_state); | |
81 | ||
82 | if (offline_pages(start_pfn, nr_pages)) { | |
83 | walk_memory_range(start_pfn, end_pfn, (void *)MEM_ONLINE, | |
84 | change_memblock_state); | |
85 | return false; | |
86 | } | |
87 | ||
88 | walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE, | |
89 | change_memblock_state); | |
90 | ||
9d5171a8 RG |
91 | |
92 | return true; | |
93 | } | |
94 | ||
95 | static u64 memtrace_alloc_node(u32 nid, u64 size) | |
96 | { | |
3f7daf3d | 97 | u64 start_pfn, end_pfn, nr_pages, pfn; |
9d5171a8 | 98 | u64 base_pfn; |
3f7daf3d | 99 | u64 bytes = memory_block_size_bytes(); |
9d5171a8 | 100 | |
8ccb442d | 101 | if (!node_spanned_pages(nid)) |
9d5171a8 RG |
102 | return 0; |
103 | ||
104 | start_pfn = node_start_pfn(nid); | |
105 | end_pfn = node_end_pfn(nid); | |
106 | nr_pages = size >> PAGE_SHIFT; | |
107 | ||
108 | /* Trace memory needs to be aligned to the size */ | |
109 | end_pfn = round_down(end_pfn - nr_pages, nr_pages); | |
110 | ||
56668487 | 111 | lock_device_hotplug(); |
9d5171a8 | 112 | for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { |
3f7daf3d RG |
113 | if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { |
114 | /* | |
115 | * Remove memory in memory block size chunks so that | |
116 | * iomem resources are always split to the same size and | |
117 | * we never try to remove memory that spans two iomem | |
118 | * resources. | |
119 | */ | |
3f7daf3d RG |
120 | end_pfn = base_pfn + nr_pages; |
121 | for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { | |
d15e5926 | 122 | __remove_memory(nid, pfn << PAGE_SHIFT, bytes); |
3f7daf3d RG |
123 | } |
124 | unlock_device_hotplug(); | |
9d5171a8 | 125 | return base_pfn << PAGE_SHIFT; |
3f7daf3d | 126 | } |
9d5171a8 | 127 | } |
56668487 | 128 | unlock_device_hotplug(); |
9d5171a8 RG |
129 | |
130 | return 0; | |
131 | } | |
132 | ||
133 | static int memtrace_init_regions_runtime(u64 size) | |
134 | { | |
135 | u32 nid; | |
136 | u64 m; | |
137 | ||
138 | memtrace_array = kcalloc(num_online_nodes(), | |
139 | sizeof(struct memtrace_entry), GFP_KERNEL); | |
140 | if (!memtrace_array) { | |
141 | pr_err("Failed to allocate memtrace_array\n"); | |
142 | return -EINVAL; | |
143 | } | |
144 | ||
145 | for_each_online_node(nid) { | |
146 | m = memtrace_alloc_node(nid, size); | |
147 | ||
148 | /* | |
149 | * A node might not have any local memory, so warn but | |
150 | * continue on. | |
151 | */ | |
152 | if (!m) { | |
153 | pr_err("Failed to allocate trace memory on node %d\n", nid); | |
154 | continue; | |
155 | } | |
156 | ||
157 | pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid, m); | |
158 | ||
159 | memtrace_array[memtrace_array_nr].start = m; | |
160 | memtrace_array[memtrace_array_nr].size = size; | |
161 | memtrace_array[memtrace_array_nr].nid = nid; | |
162 | memtrace_array_nr++; | |
163 | } | |
164 | ||
165 | return 0; | |
166 | } | |
167 | ||
168 | static struct dentry *memtrace_debugfs_dir; | |
169 | ||
170 | static int memtrace_init_debugfs(void) | |
171 | { | |
172 | int ret = 0; | |
173 | int i; | |
174 | ||
175 | for (i = 0; i < memtrace_array_nr; i++) { | |
176 | struct dentry *dir; | |
177 | struct memtrace_entry *ent = &memtrace_array[i]; | |
178 | ||
179 | ent->mem = ioremap(ent->start, ent->size); | |
180 | /* Warn but continue on */ | |
181 | if (!ent->mem) { | |
182 | pr_err("Failed to map trace memory at 0x%llx\n", | |
183 | ent->start); | |
184 | ret = -1; | |
185 | continue; | |
186 | } | |
187 | ||
188 | snprintf(ent->name, 16, "%08x", ent->nid); | |
189 | dir = debugfs_create_dir(ent->name, memtrace_debugfs_dir); | |
d3da701d RG |
190 | if (!dir) { |
191 | pr_err("Failed to create debugfs directory for node %d\n", | |
192 | ent->nid); | |
9d5171a8 | 193 | return -1; |
d3da701d | 194 | } |
9d5171a8 RG |
195 | |
196 | ent->dir = dir; | |
197 | debugfs_create_file("trace", 0400, dir, ent, &memtrace_fops); | |
198 | debugfs_create_x64("start", 0400, dir, &ent->start); | |
199 | debugfs_create_x64("size", 0400, dir, &ent->size); | |
200 | } | |
201 | ||
202 | return ret; | |
203 | } | |
204 | ||
d3da701d RG |
205 | static int online_mem_block(struct memory_block *mem, void *arg) |
206 | { | |
207 | return device_online(&mem->dev); | |
208 | } | |
209 | ||
210 | /* | |
211 | * Iterate through the chunks of memory we have removed from the kernel | |
212 | * and attempt to add them back to the kernel. | |
213 | */ | |
214 | static int memtrace_online(void) | |
215 | { | |
216 | int i, ret = 0; | |
217 | struct memtrace_entry *ent; | |
218 | ||
219 | for (i = memtrace_array_nr - 1; i >= 0; i--) { | |
220 | ent = &memtrace_array[i]; | |
221 | ||
222 | /* We have onlined this chunk previously */ | |
98fa15f3 | 223 | if (ent->nid == NUMA_NO_NODE) |
d3da701d RG |
224 | continue; |
225 | ||
226 | /* Remove from io mappings */ | |
227 | if (ent->mem) { | |
228 | iounmap(ent->mem); | |
229 | ent->mem = 0; | |
230 | } | |
231 | ||
232 | if (add_memory(ent->nid, ent->start, ent->size)) { | |
233 | pr_err("Failed to add trace memory to node %d\n", | |
234 | ent->nid); | |
235 | ret += 1; | |
236 | continue; | |
237 | } | |
238 | ||
239 | /* | |
240 | * If kernel isn't compiled with the auto online option | |
241 | * we need to online the memory ourselves. | |
242 | */ | |
243 | if (!memhp_auto_online) { | |
cec16805 | 244 | lock_device_hotplug(); |
d3da701d RG |
245 | walk_memory_range(PFN_DOWN(ent->start), |
246 | PFN_UP(ent->start + ent->size - 1), | |
247 | NULL, online_mem_block); | |
cec16805 | 248 | unlock_device_hotplug(); |
d3da701d RG |
249 | } |
250 | ||
251 | /* | |
252 | * Memory was added successfully so clean up references to it | |
253 | * so on reentry we can tell that this chunk was added. | |
254 | */ | |
255 | debugfs_remove_recursive(ent->dir); | |
256 | pr_info("Added trace memory back to node %d\n", ent->nid); | |
98fa15f3 | 257 | ent->size = ent->start = ent->nid = NUMA_NO_NODE; |
d3da701d RG |
258 | } |
259 | if (ret) | |
260 | return ret; | |
261 | ||
262 | /* If all chunks of memory were added successfully, reset globals */ | |
263 | kfree(memtrace_array); | |
264 | memtrace_array = NULL; | |
265 | memtrace_size = 0; | |
266 | memtrace_array_nr = 0; | |
267 | return 0; | |
268 | } | |
269 | ||
9d5171a8 RG |
270 | static int memtrace_enable_set(void *data, u64 val) |
271 | { | |
d3da701d RG |
272 | u64 bytes; |
273 | ||
274 | /* | |
275 | * Don't attempt to do anything if size isn't aligned to a memory | |
276 | * block or equal to zero. | |
277 | */ | |
278 | bytes = memory_block_size_bytes(); | |
279 | if (val & (bytes - 1)) { | |
280 | pr_err("Value must be aligned with 0x%llx\n", bytes); | |
9d5171a8 | 281 | return -EINVAL; |
d3da701d | 282 | } |
9d5171a8 | 283 | |
d3da701d RG |
284 | /* Re-add/online previously removed/offlined memory */ |
285 | if (memtrace_size) { | |
286 | if (memtrace_online()) | |
287 | return -EAGAIN; | |
288 | } | |
9d5171a8 | 289 | |
d3da701d RG |
290 | if (!val) |
291 | return 0; | |
9d5171a8 | 292 | |
d3da701d | 293 | /* Offline and remove memory */ |
9d5171a8 RG |
294 | if (memtrace_init_regions_runtime(val)) |
295 | return -EINVAL; | |
296 | ||
297 | if (memtrace_init_debugfs()) | |
298 | return -EINVAL; | |
299 | ||
300 | memtrace_size = val; | |
301 | ||
302 | return 0; | |
303 | } | |
304 | ||
305 | static int memtrace_enable_get(void *data, u64 *val) | |
306 | { | |
307 | *val = memtrace_size; | |
308 | return 0; | |
309 | } | |
310 | ||
311 | DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops, memtrace_enable_get, | |
312 | memtrace_enable_set, "0x%016llx\n"); | |
313 | ||
314 | static int memtrace_init(void) | |
315 | { | |
316 | memtrace_debugfs_dir = debugfs_create_dir("memtrace", | |
317 | powerpc_debugfs_root); | |
318 | if (!memtrace_debugfs_dir) | |
319 | return -1; | |
320 | ||
321 | debugfs_create_file("enable", 0600, memtrace_debugfs_dir, | |
322 | NULL, &memtrace_init_fops); | |
323 | ||
324 | return 0; | |
325 | } | |
326 | machine_device_initcall(powernv, memtrace_init); |