1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright (C) IBM Corporation, 2014, 2017
4 * Anton Blanchard, Rashmica Gupta.
7 #define pr_fmt(fmt) "memtrace: " fmt
9 #include <linux/bitops.h>
10 #include <linux/string.h>
11 #include <linux/memblock.h>
12 #include <linux/init.h>
13 #include <linux/moduleparam.h>
15 #include <linux/debugfs.h>
16 #include <linux/slab.h>
17 #include <linux/memory.h>
18 #include <linux/memory_hotplug.h>
19 #include <linux/numa.h>
20 #include <asm/machdep.h>
21 #include <asm/debugfs.h>
23 /* This enables us to keep track of the memory removed from each node. */
24 struct memtrace_entry
{
33 static u64 memtrace_size
;
35 static struct memtrace_entry
*memtrace_array
;
36 static unsigned int memtrace_array_nr
;
39 static ssize_t
memtrace_read(struct file
*filp
, char __user
*ubuf
,
40 size_t count
, loff_t
*ppos
)
42 struct memtrace_entry
*ent
= filp
->private_data
;
44 return simple_read_from_buffer(ubuf
, count
, ppos
, ent
->mem
, ent
->size
);
47 static const struct file_operations memtrace_fops
= {
48 .llseek
= default_llseek
,
49 .read
= memtrace_read
,
53 static int check_memblock_online(struct memory_block
*mem
, void *arg
)
55 if (mem
->state
!= MEM_ONLINE
)
61 static int change_memblock_state(struct memory_block
*mem
, void *arg
)
63 unsigned long state
= (unsigned long)arg
;
70 /* called with device_hotplug_lock held */
71 static bool memtrace_offline_pages(u32 nid
, u64 start_pfn
, u64 nr_pages
)
73 u64 end_pfn
= start_pfn
+ nr_pages
- 1;
75 if (walk_memory_range(start_pfn
, end_pfn
, NULL
,
76 check_memblock_online
))
79 walk_memory_range(start_pfn
, end_pfn
, (void *)MEM_GOING_OFFLINE
,
80 change_memblock_state
);
82 if (offline_pages(start_pfn
, nr_pages
)) {
83 walk_memory_range(start_pfn
, end_pfn
, (void *)MEM_ONLINE
,
84 change_memblock_state
);
88 walk_memory_range(start_pfn
, end_pfn
, (void *)MEM_OFFLINE
,
89 change_memblock_state
);
95 static u64
memtrace_alloc_node(u32 nid
, u64 size
)
97 u64 start_pfn
, end_pfn
, nr_pages
, pfn
;
99 u64 bytes
= memory_block_size_bytes();
101 if (!node_spanned_pages(nid
))
104 start_pfn
= node_start_pfn(nid
);
105 end_pfn
= node_end_pfn(nid
);
106 nr_pages
= size
>> PAGE_SHIFT
;
108 /* Trace memory needs to be aligned to the size */
109 end_pfn
= round_down(end_pfn
- nr_pages
, nr_pages
);
111 lock_device_hotplug();
112 for (base_pfn
= end_pfn
; base_pfn
> start_pfn
; base_pfn
-= nr_pages
) {
113 if (memtrace_offline_pages(nid
, base_pfn
, nr_pages
) == true) {
115 * Remove memory in memory block size chunks so that
116 * iomem resources are always split to the same size and
117 * we never try to remove memory that spans two iomem
120 end_pfn
= base_pfn
+ nr_pages
;
121 for (pfn
= base_pfn
; pfn
< end_pfn
; pfn
+= bytes
>> PAGE_SHIFT
) {
122 __remove_memory(nid
, pfn
<< PAGE_SHIFT
, bytes
);
124 unlock_device_hotplug();
125 return base_pfn
<< PAGE_SHIFT
;
128 unlock_device_hotplug();
133 static int memtrace_init_regions_runtime(u64 size
)
138 memtrace_array
= kcalloc(num_online_nodes(),
139 sizeof(struct memtrace_entry
), GFP_KERNEL
);
140 if (!memtrace_array
) {
141 pr_err("Failed to allocate memtrace_array\n");
145 for_each_online_node(nid
) {
146 m
= memtrace_alloc_node(nid
, size
);
149 * A node might not have any local memory, so warn but
153 pr_err("Failed to allocate trace memory on node %d\n", nid
);
157 pr_info("Allocated trace memory on node %d at 0x%016llx\n", nid
, m
);
159 memtrace_array
[memtrace_array_nr
].start
= m
;
160 memtrace_array
[memtrace_array_nr
].size
= size
;
161 memtrace_array
[memtrace_array_nr
].nid
= nid
;
168 static struct dentry
*memtrace_debugfs_dir
;
170 static int memtrace_init_debugfs(void)
175 for (i
= 0; i
< memtrace_array_nr
; i
++) {
177 struct memtrace_entry
*ent
= &memtrace_array
[i
];
179 ent
->mem
= ioremap(ent
->start
, ent
->size
);
180 /* Warn but continue on */
182 pr_err("Failed to map trace memory at 0x%llx\n",
188 snprintf(ent
->name
, 16, "%08x", ent
->nid
);
189 dir
= debugfs_create_dir(ent
->name
, memtrace_debugfs_dir
);
191 pr_err("Failed to create debugfs directory for node %d\n",
197 debugfs_create_file("trace", 0400, dir
, ent
, &memtrace_fops
);
198 debugfs_create_x64("start", 0400, dir
, &ent
->start
);
199 debugfs_create_x64("size", 0400, dir
, &ent
->size
);
205 static int online_mem_block(struct memory_block
*mem
, void *arg
)
207 return device_online(&mem
->dev
);
211 * Iterate through the chunks of memory we have removed from the kernel
212 * and attempt to add them back to the kernel.
214 static int memtrace_online(void)
217 struct memtrace_entry
*ent
;
219 for (i
= memtrace_array_nr
- 1; i
>= 0; i
--) {
220 ent
= &memtrace_array
[i
];
222 /* We have onlined this chunk previously */
223 if (ent
->nid
== NUMA_NO_NODE
)
226 /* Remove from io mappings */
232 if (add_memory(ent
->nid
, ent
->start
, ent
->size
)) {
233 pr_err("Failed to add trace memory to node %d\n",
240 * If kernel isn't compiled with the auto online option
241 * we need to online the memory ourselves.
243 if (!memhp_auto_online
) {
244 lock_device_hotplug();
245 walk_memory_range(PFN_DOWN(ent
->start
),
246 PFN_UP(ent
->start
+ ent
->size
- 1),
247 NULL
, online_mem_block
);
248 unlock_device_hotplug();
252 * Memory was added successfully so clean up references to it
253 * so on reentry we can tell that this chunk was added.
255 debugfs_remove_recursive(ent
->dir
);
256 pr_info("Added trace memory back to node %d\n", ent
->nid
);
257 ent
->size
= ent
->start
= ent
->nid
= NUMA_NO_NODE
;
262 /* If all chunks of memory were added successfully, reset globals */
263 kfree(memtrace_array
);
264 memtrace_array
= NULL
;
266 memtrace_array_nr
= 0;
270 static int memtrace_enable_set(void *data
, u64 val
)
275 * Don't attempt to do anything if size isn't aligned to a memory
276 * block or equal to zero.
278 bytes
= memory_block_size_bytes();
279 if (val
& (bytes
- 1)) {
280 pr_err("Value must be aligned with 0x%llx\n", bytes
);
284 /* Re-add/online previously removed/offlined memory */
286 if (memtrace_online())
293 /* Offline and remove memory */
294 if (memtrace_init_regions_runtime(val
))
297 if (memtrace_init_debugfs())
305 static int memtrace_enable_get(void *data
, u64
*val
)
307 *val
= memtrace_size
;
311 DEFINE_SIMPLE_ATTRIBUTE(memtrace_init_fops
, memtrace_enable_get
,
312 memtrace_enable_set
, "0x%016llx\n");
314 static int memtrace_init(void)
316 memtrace_debugfs_dir
= debugfs_create_dir("memtrace",
317 powerpc_debugfs_root
);
318 if (!memtrace_debugfs_dir
)
321 debugfs_create_file("enable", 0600, memtrace_debugfs_dir
,
322 NULL
, &memtrace_init_fops
);
326 machine_device_initcall(powernv
, memtrace_init
);