]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - arch/x86_64/mm/numa.c
[PATCH] x86_64: Fix up coding style in numa.c
[mirror_ubuntu-bionic-kernel.git] / arch / x86_64 / mm / numa.c
CommitLineData
1da177e4
LT
1/*
2 * Generic VM initialization for x86-64 NUMA setups.
3 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
4 */
5#include <linux/kernel.h>
6#include <linux/mm.h>
7#include <linux/string.h>
8#include <linux/init.h>
9#include <linux/bootmem.h>
10#include <linux/mmzone.h>
11#include <linux/ctype.h>
12#include <linux/module.h>
13#include <linux/nodemask.h>
14
15#include <asm/e820.h>
16#include <asm/proto.h>
17#include <asm/dma.h>
18#include <asm/numa.h>
19#include <asm/acpi.h>
20
21#ifndef Dprintk
22#define Dprintk(x...)
23#endif
24
6c231b7b 25struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
1da177e4
LT
26bootmem_data_t plat_node_bdata[MAX_NUMNODES];
27
28int memnode_shift;
29u8 memnodemap[NODEMAPSIZE];
30
3f098c26
AK
31unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
32 [0 ... NR_CPUS-1] = NUMA_NO_NODE
0b07e984 33};
3f098c26
AK
34unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
35 [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
36};
37cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
1da177e4
LT
38
39int numa_off __initdata;
40
529a3404
ED
41
42/*
43 * Given a shift value, try to populate memnodemap[]
44 * Returns :
45 * 1 if OK
46 * 0 if memnodmap[] too small (of shift too small)
47 * -1 if node overlap or lost ram (shift too big)
48 */
d18ff470
AK
49static int __init
50populate_memnodemap(const struct node *nodes, int numnodes, int shift)
1da177e4
LT
51{
52 int i;
529a3404
ED
53 int res = -1;
54 unsigned long addr, end;
b684664f 55
8309cf66
ED
56 if (shift >= 64)
57 return -1;
529a3404 58 memset(memnodemap, 0xff, sizeof(memnodemap));
b684664f 59 for (i = 0; i < numnodes; i++) {
529a3404
ED
60 addr = nodes[i].start;
61 end = nodes[i].end;
62 if (addr >= end)
b684664f 63 continue;
529a3404
ED
64 if ((end >> shift) >= NODEMAPSIZE)
65 return 0;
66 do {
67 if (memnodemap[addr >> shift] != 0xff)
b684664f 68 return -1;
b684664f 69 memnodemap[addr >> shift] = i;
8309cf66 70 addr += (1UL << shift);
529a3404
ED
71 } while (addr < end);
72 res = 1;
1da177e4 73 }
529a3404
ED
74 return res;
75}
76
77int __init compute_hash_shift(struct node *nodes, int numnodes)
78{
79 int shift = 20;
80
81 while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
82 shift++;
83
84 printk(KERN_DEBUG "Using %d for the hash shift.\n",
85 shift);
86
87 if (populate_memnodemap(nodes, numnodes, shift) != 1) {
88 printk(KERN_INFO
89 "Your memory is not aligned you need to rebuild your kernel "
90 "with a bigger NODEMAPSIZE shift=%d\n",
91 shift);
92 return -1;
93 }
b684664f 94 return shift;
1da177e4
LT
95}
96
bbfceef4
MT
97#ifdef CONFIG_SPARSEMEM
98int early_pfn_to_nid(unsigned long pfn)
99{
100 return phys_to_nid(pfn << PAGE_SHIFT);
101}
102#endif
103
1da177e4
LT
104/* Initialize bootmem allocator for a node */
105void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
106{
107 unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
108 unsigned long nodedata_phys;
109 const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
110
111 start = round_up(start, ZONE_ALIGN);
112
113 printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
114
115 start_pfn = start >> PAGE_SHIFT;
116 end_pfn = end >> PAGE_SHIFT;
117
118 nodedata_phys = find_e820_area(start, end, pgdat_size);
119 if (nodedata_phys == -1L)
120 panic("Cannot find memory pgdat in node %d\n", nodeid);
121
122 Dprintk("nodedata_phys %lx\n", nodedata_phys);
123
124 node_data[nodeid] = phys_to_virt(nodedata_phys);
125 memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
126 NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
127 NODE_DATA(nodeid)->node_start_pfn = start_pfn;
128 NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
129
130 /* Find a place for the bootmem map */
131 bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
132 bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
133 bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT);
134 if (bootmap_start == -1L)
135 panic("Not enough continuous space for bootmap on node %d", nodeid);
136 Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
137
138 bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
139 bootmap_start >> PAGE_SHIFT,
140 start_pfn, end_pfn);
141
142 e820_bootmem_free(NODE_DATA(nodeid), start, end);
143
144 reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
145 reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
146 node_set_online(nodeid);
147}
148
149/* Initialize final allocator for a zone */
150void __init setup_node_zones(int nodeid)
151{
152 unsigned long start_pfn, end_pfn;
153 unsigned long zones[MAX_NR_ZONES];
485761bd 154 unsigned long holes[MAX_NR_ZONES];
1da177e4 155
a2f1b424
AK
156 start_pfn = node_start_pfn(nodeid);
157 end_pfn = node_end_pfn(nodeid);
1da177e4 158
a2f1b424
AK
159 Dprintk(KERN_INFO "setting up node %d %lx-%lx\n",
160 nodeid, start_pfn, end_pfn);
1da177e4 161
a2f1b424 162 size_zones(zones, holes, start_pfn, end_pfn);
1da177e4 163 free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
485761bd 164 start_pfn, holes);
1da177e4
LT
165}
166
167void __init numa_init_array(void)
168{
169 int rr, i;
170 /* There are unfortunately some poorly designed mainboards around
171 that only connect memory to a single CPU. This breaks the 1:1 cpu->node
172 mapping. To avoid this fill in the mapping for all possible
173 CPUs, as the number of CPUs is not known yet.
174 We round robin the existing nodes. */
85cc5135 175 rr = first_node(node_online_map);
1da177e4
LT
176 for (i = 0; i < NR_CPUS; i++) {
177 if (cpu_to_node[i] != NUMA_NO_NODE)
178 continue;
69d81fcd 179 numa_set_node(i, rr);
1da177e4
LT
180 rr = next_node(rr, node_online_map);
181 if (rr == MAX_NUMNODES)
182 rr = first_node(node_online_map);
1da177e4
LT
183 }
184
1da177e4
LT
185}
186
187#ifdef CONFIG_NUMA_EMU
188int numa_fake __initdata = 0;
189
190/* Numa emulation */
191static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
192{
193 int i;
194 struct node nodes[MAX_NUMNODES];
195 unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
196
197 /* Kludge needed for the hash function */
198 if (hweight64(sz) > 1) {
199 unsigned long x = 1;
200 while ((x << 1) < sz)
201 x <<= 1;
202 if (x < sz/2)
203 printk("Numa emulation unbalanced. Complain to maintainer\n");
204 sz = x;
205 }
206
207 memset(&nodes,0,sizeof(nodes));
208 for (i = 0; i < numa_fake; i++) {
209 nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
210 if (i == numa_fake-1)
211 sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
212 nodes[i].end = nodes[i].start + sz;
1da177e4
LT
213 printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
214 i,
215 nodes[i].start, nodes[i].end,
216 (nodes[i].end - nodes[i].start) >> 20);
217 node_set_online(i);
218 }
219 memnode_shift = compute_hash_shift(nodes, numa_fake);
220 if (memnode_shift < 0) {
221 memnode_shift = 0;
222 printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
223 return -1;
224 }
225 for_each_online_node(i)
226 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
227 numa_init_array();
228 return 0;
229}
230#endif
231
232void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
233{
234 int i;
235
236#ifdef CONFIG_NUMA_EMU
237 if (numa_fake && !numa_emulation(start_pfn, end_pfn))
238 return;
239#endif
240
241#ifdef CONFIG_ACPI_NUMA
242 if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
243 end_pfn << PAGE_SHIFT))
244 return;
245#endif
246
247#ifdef CONFIG_K8_NUMA
248 if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
249 return;
250#endif
251 printk(KERN_INFO "%s\n",
252 numa_off ? "NUMA turned off" : "No NUMA configuration found");
253
254 printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
255 start_pfn << PAGE_SHIFT,
256 end_pfn << PAGE_SHIFT);
257 /* setup dummy node covering all memory */
258 memnode_shift = 63;
259 memnodemap[0] = 0;
260 nodes_clear(node_online_map);
261 node_set_online(0);
262 for (i = 0; i < NR_CPUS; i++)
69d81fcd 263 numa_set_node(i, 0);
1da177e4
LT
264 node_to_cpumask[0] = cpumask_of_cpu(0);
265 setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
266}
267
e6982c67 268__cpuinit void numa_add_cpu(int cpu)
1da177e4 269{
e6a045a5 270 set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
1da177e4
LT
271}
272
69d81fcd
AK
273void __cpuinit numa_set_node(int cpu, int node)
274{
275 cpu_pda[cpu].nodenumber = node;
276 cpu_to_node[cpu] = node;
277}
278
1da177e4
LT
279unsigned long __init numa_free_all_bootmem(void)
280{
281 int i;
282 unsigned long pages = 0;
283 for_each_online_node(i) {
284 pages += free_all_bootmem_node(NODE_DATA(i));
285 }
286 return pages;
287}
288
d3ee871e
BP
289#ifdef CONFIG_SPARSEMEM
290static void __init arch_sparse_init(void)
291{
292 int i;
293
294 for_each_online_node(i)
295 memory_present(i, node_start_pfn(i), node_end_pfn(i));
296
297 sparse_init();
298}
299#else
300#define arch_sparse_init() do {} while (0)
301#endif
302
1da177e4
LT
303void __init paging_init(void)
304{
305 int i;
d3ee871e
BP
306
307 arch_sparse_init();
308
1da177e4
LT
309 for_each_online_node(i) {
310 setup_node_zones(i);
311 }
312}
313
314/* [numa=off] */
315__init int numa_setup(char *opt)
316{
317 if (!strncmp(opt,"off",3))
318 numa_off = 1;
319#ifdef CONFIG_NUMA_EMU
320 if(!strncmp(opt, "fake=", 5)) {
321 numa_fake = simple_strtoul(opt+5,NULL,0); ;
322 if (numa_fake >= MAX_NUMNODES)
323 numa_fake = MAX_NUMNODES;
324 }
325#endif
326#ifdef CONFIG_ACPI_NUMA
327 if (!strncmp(opt,"noacpi",6))
328 acpi_numa = -1;
329#endif
330 return 1;
331}
332
333EXPORT_SYMBOL(cpu_to_node);
334EXPORT_SYMBOL(node_to_cpumask);
335EXPORT_SYMBOL(memnode_shift);
336EXPORT_SYMBOL(memnodemap);
337EXPORT_SYMBOL(node_data);