]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - arch/x86_64/mm/srat.c
[PATCH] i386/x86_64: Force pci=noacpi on HP XW9300
[mirror_ubuntu-zesty-kernel.git] / arch / x86_64 / mm / srat.c
CommitLineData
1da177e4
LT
1/*
2 * ACPI 3.0 based NUMA setup
3 * Copyright 2004 Andi Kleen, SuSE Labs.
4 *
5 * Reads the ACPI SRAT table to figure out what memory belongs to which CPUs.
6 *
7 * Called from acpi_numa_init while reading the SRAT and SLIT tables.
8 * Assumes all memory regions belonging to a single proximity domain
9 * are in one chunk. Holes between them will be included in the node.
10 */
11
12#include <linux/kernel.h>
13#include <linux/acpi.h>
14#include <linux/mmzone.h>
15#include <linux/bitmap.h>
16#include <linux/module.h>
17#include <linux/topology.h>
68a3a7fe
AK
18#include <linux/bootmem.h>
19#include <linux/mm.h>
1da177e4
LT
20#include <asm/proto.h>
21#include <asm/numa.h>
8a6fdd3e 22#include <asm/e820.h>
1da177e4 23
68a3a7fe
AK
24#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
25 defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \
26 && !defined(CONFIG_MEMORY_HOTPLUG)
27#define RESERVE_HOTADD 1
28#endif
29
1da177e4
LT
30static struct acpi_table_slit *acpi_slit;
31
32static nodemask_t nodes_parsed __initdata;
33static nodemask_t nodes_found __initdata;
abe059e7 34static struct bootnode nodes[MAX_NUMNODES] __initdata;
68a3a7fe
AK
35static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
36static int found_add_area __initdata;
37int hotadd_percent __initdata = 10;
e4e94072 38static u8 pxm2node[256] = { [0 ... 255] = 0xff };
1da177e4 39
9391a3f9
AK
40/* Too small nodes confuse the VM badly. Usually they result
41 from BIOS bugs. */
42#define NODE_MIN_SIZE (4*1024*1024)
43
05d1fa4b
AK
44static int node_to_pxm(int n);
45
69e1a33f
AK
46int pxm_to_node(int pxm)
47{
48 if ((unsigned)pxm >= 256)
e4e94072
AK
49 return -1;
50 /* Extend 0xff to (int)-1 */
51 return (signed char)pxm2node[pxm];
69e1a33f
AK
52}
53
1da177e4
LT
54static __init int setup_node(int pxm)
55{
56 unsigned node = pxm2node[pxm];
57 if (node == 0xff) {
58 if (nodes_weight(nodes_found) >= MAX_NUMNODES)
59 return -1;
60 node = first_unset_node(nodes_found);
61 node_set(node, nodes_found);
62 pxm2node[pxm] = node;
63 }
64 return pxm2node[pxm];
65}
66
67static __init int conflicting_nodes(unsigned long start, unsigned long end)
68{
69 int i;
4b6a455c 70 for_each_node_mask(i, nodes_parsed) {
abe059e7 71 struct bootnode *nd = &nodes[i];
1da177e4
LT
72 if (nd->start == nd->end)
73 continue;
74 if (nd->end > start && nd->start < end)
05d1fa4b 75 return i;
1da177e4 76 if (nd->end == end && nd->start == start)
05d1fa4b 77 return i;
1da177e4
LT
78 }
79 return -1;
80}
81
82static __init void cutoff_node(int i, unsigned long start, unsigned long end)
83{
abe059e7 84 struct bootnode *nd = &nodes[i];
68a3a7fe
AK
85
86 if (found_add_area)
87 return;
88
1da177e4
LT
89 if (nd->start < start) {
90 nd->start = start;
91 if (nd->end < nd->start)
92 nd->start = nd->end;
93 }
94 if (nd->end > end) {
1da177e4
LT
95 nd->end = end;
96 if (nd->start > nd->end)
97 nd->start = nd->end;
98 }
99}
100
101static __init void bad_srat(void)
102{
2bce2b54 103 int i;
1da177e4
LT
104 printk(KERN_ERR "SRAT: SRAT not used.\n");
105 acpi_numa = -1;
2bce2b54
AK
106 for (i = 0; i < MAX_LOCAL_APIC; i++)
107 apicid_to_node[i] = NUMA_NO_NODE;
68a3a7fe
AK
108 for (i = 0; i < MAX_NUMNODES; i++)
109 nodes_add[i].start = nodes[i].end = 0;
1da177e4
LT
110}
111
112static __init inline int srat_disabled(void)
113{
114 return numa_off || acpi_numa < 0;
115}
116
1584b89c
AK
117/*
118 * A lot of BIOS fill in 10 (= no distance) everywhere. This messes
119 * up the NUMA heuristics which wants the local node to have a smaller
120 * distance than the others.
121 * Do some quick checks here and only use the SLIT if it passes.
122 */
123static __init int slit_valid(struct acpi_table_slit *slit)
124{
125 int i, j;
126 int d = slit->localities;
127 for (i = 0; i < d; i++) {
128 for (j = 0; j < d; j++) {
129 u8 val = slit->entry[d*i + j];
130 if (i == j) {
131 if (val != 10)
132 return 0;
133 } else if (val <= 10)
134 return 0;
135 }
136 }
137 return 1;
138}
139
1da177e4
LT
140/* Callback for SLIT parsing */
141void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
142{
1584b89c
AK
143 if (!slit_valid(slit)) {
144 printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n");
145 return;
146 }
1da177e4
LT
147 acpi_slit = slit;
148}
149
150/* Callback for Proximity Domain -> LAPIC mapping */
151void __init
152acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
153{
154 int pxm, node;
d22fe808
AK
155 if (srat_disabled())
156 return;
157 if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) { bad_srat();
158 return;
159 }
160 if (pa->flags.enabled == 0)
1da177e4
LT
161 return;
162 pxm = pa->proximity_domain;
163 node = setup_node(pxm);
164 if (node < 0) {
165 printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm);
166 bad_srat();
167 return;
168 }
0b07e984 169 apicid_to_node[pa->apic_id] = node;
1da177e4 170 acpi_numa = 1;
0b07e984
AK
171 printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
172 pxm, pa->apic_id, node);
1da177e4
LT
173}
174
68a3a7fe
AK
175#ifdef RESERVE_HOTADD
176/*
177 * Protect against too large hotadd areas that would fill up memory.
178 */
179static int hotadd_enough_memory(struct bootnode *nd)
180{
181 static unsigned long allocated;
182 static unsigned long last_area_end;
183 unsigned long pages = (nd->end - nd->start) >> PAGE_SHIFT;
184 long mem = pages * sizeof(struct page);
185 unsigned long addr;
186 unsigned long allowed;
187 unsigned long oldpages = pages;
188
189 if (mem < 0)
190 return 0;
191 allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE;
192 allowed = (allowed / 100) * hotadd_percent;
193 if (allocated + mem > allowed) {
194 /* Give them at least part of their hotadd memory upto hotadd_percent
195 It would be better to spread the limit out
196 over multiple hotplug areas, but that is too complicated
197 right now */
198 if (allocated >= allowed)
199 return 0;
200 pages = (allowed - allocated + mem) / sizeof(struct page);
201 mem = pages * sizeof(struct page);
202 nd->end = nd->start + pages*PAGE_SIZE;
203 }
204 /* Not completely fool proof, but a good sanity check */
205 addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem);
206 if (addr == -1UL)
207 return 0;
208 if (pages != oldpages)
209 printk(KERN_NOTICE "SRAT: Hotadd area limited to %lu bytes\n",
210 pages << PAGE_SHIFT);
211 last_area_end = addr + mem;
212 allocated += mem;
213 return 1;
214}
215
216/*
217 * It is fine to add this area to the nodes data it will be used later
218 * This code supports one contigious hot add area per node.
219 */
220static int reserve_hotadd(int node, unsigned long start, unsigned long end)
221{
222 unsigned long s_pfn = start >> PAGE_SHIFT;
223 unsigned long e_pfn = end >> PAGE_SHIFT;
224 int changed = 0;
225 struct bootnode *nd = &nodes_add[node];
226
227 /* I had some trouble with strange memory hotadd regions breaking
228 the boot. Be very strict here and reject anything unexpected.
229 If you want working memory hotadd write correct SRATs.
230
231 The node size check is a basic sanity check to guard against
232 mistakes */
233 if ((signed long)(end - start) < NODE_MIN_SIZE) {
234 printk(KERN_ERR "SRAT: Hotplug area too small\n");
235 return -1;
236 }
237
238 /* This check might be a bit too strict, but I'm keeping it for now. */
239 if (e820_hole_size(s_pfn, e_pfn) != e_pfn - s_pfn) {
240 printk(KERN_ERR "SRAT: Hotplug area has existing memory\n");
241 return -1;
242 }
243
244 if (!hotadd_enough_memory(&nodes_add[node])) {
245 printk(KERN_ERR "SRAT: Hotplug area too large\n");
246 return -1;
247 }
248
249 /* Looks good */
250
251 found_add_area = 1;
252 if (nd->start == nd->end) {
253 nd->start = start;
254 nd->end = end;
255 changed = 1;
256 } else {
257 if (nd->start == end) {
258 nd->start = start;
259 changed = 1;
260 }
261 if (nd->end == start) {
262 nd->end = end;
263 changed = 1;
264 }
265 if (!changed)
266 printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
267 }
268
269 if ((nd->end >> PAGE_SHIFT) > end_pfn)
270 end_pfn = nd->end >> PAGE_SHIFT;
271
272 if (changed)
273 printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
274 return 0;
275}
276#endif
277
1da177e4
LT
278/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
279void __init
280acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
281{
68a3a7fe 282 struct bootnode *nd, oldnode;
1da177e4
LT
283 unsigned long start, end;
284 int node, pxm;
285 int i;
286
d22fe808 287 if (srat_disabled())
1da177e4 288 return;
d22fe808
AK
289 if (ma->header.length != sizeof(struct acpi_table_memory_affinity)) {
290 bad_srat();
291 return;
292 }
293 if (ma->flags.enabled == 0)
294 return;
68a3a7fe
AK
295 if (ma->flags.hot_pluggable && hotadd_percent == 0)
296 return;
d22fe808
AK
297 start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
298 end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
1da177e4
LT
299 pxm = ma->proximity_domain;
300 node = setup_node(pxm);
301 if (node < 0) {
302 printk(KERN_ERR "SRAT: Too many proximity domains.\n");
303 bad_srat();
304 return;
305 }
1da177e4 306 i = conflicting_nodes(start, end);
05d1fa4b
AK
307 if (i == node) {
308 printk(KERN_WARNING
309 "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n",
310 pxm, start, end, nodes[i].start, nodes[i].end);
311 } else if (i >= 0) {
1da177e4 312 printk(KERN_ERR
05d1fa4b
AK
313 "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n",
314 pxm, start, end, node_to_pxm(i),
315 nodes[i].start, nodes[i].end);
1da177e4
LT
316 bad_srat();
317 return;
318 }
319 nd = &nodes[node];
68a3a7fe 320 oldnode = *nd;
1da177e4
LT
321 if (!node_test_and_set(node, nodes_parsed)) {
322 nd->start = start;
323 nd->end = end;
324 } else {
325 if (start < nd->start)
326 nd->start = start;
327 if (nd->end < end)
328 nd->end = end;
329 }
68a3a7fe 330
1da177e4
LT
331 printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
332 nd->start, nd->end);
68a3a7fe
AK
333
334#ifdef RESERVE_HOTADD
335 if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
336 /* Ignore hotadd region. Undo damage */
337 printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
338 *nd = oldnode;
339 if ((nd->start | nd->end) == 0)
340 node_clear(node, nodes_parsed);
341 }
342#endif
1da177e4
LT
343}
344
8a6fdd3e
AK
345/* Sanity check to catch more bad SRATs (they are amazingly common).
346 Make sure the PXMs cover all memory. */
347static int nodes_cover_memory(void)
348{
349 int i;
350 unsigned long pxmram, e820ram;
351
352 pxmram = 0;
353 for_each_node_mask(i, nodes_parsed) {
354 unsigned long s = nodes[i].start >> PAGE_SHIFT;
355 unsigned long e = nodes[i].end >> PAGE_SHIFT;
356 pxmram += e - s;
357 pxmram -= e820_hole_size(s, e);
68a3a7fe
AK
358 pxmram -= nodes_add[i].end - nodes_add[i].start;
359 if ((long)pxmram < 0)
360 pxmram = 0;
8a6fdd3e
AK
361 }
362
363 e820ram = end_pfn - e820_hole_size(0, end_pfn);
fdb9df94
AK
364 /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
365 if ((long)(e820ram - pxmram) >= 1*1024*1024) {
8a6fdd3e
AK
366 printk(KERN_ERR
367 "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
368 (pxmram << PAGE_SHIFT) >> 20,
369 (e820ram << PAGE_SHIFT) >> 20);
370 return 0;
371 }
372 return 1;
373}
374
9391a3f9
AK
375static void unparse_node(int node)
376{
377 int i;
378 node_clear(node, nodes_parsed);
379 for (i = 0; i < MAX_LOCAL_APIC; i++) {
380 if (apicid_to_node[i] == node)
381 apicid_to_node[i] = NUMA_NO_NODE;
382 }
383}
384
1da177e4
LT
385void __init acpi_numa_arch_fixup(void) {}
386
387/* Use the information discovered above to actually set up the nodes. */
388int __init acpi_scan_nodes(unsigned long start, unsigned long end)
389{
390 int i;
8a6fdd3e 391
e58e0d03 392 /* First clean up the node list */
9391a3f9 393 for (i = 0; i < MAX_NUMNODES; i++) {
68a3a7fe 394 cutoff_node(i, start, end);
9391a3f9
AK
395 if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE)
396 unparse_node(i);
e58e0d03
AK
397 }
398
9391a3f9
AK
399 if (acpi_numa <= 0)
400 return -1;
401
8a6fdd3e
AK
402 if (!nodes_cover_memory()) {
403 bad_srat();
404 return -1;
405 }
406
2aed711a 407 memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
1da177e4
LT
408 if (memnode_shift < 0) {
409 printk(KERN_ERR
410 "SRAT: No NUMA node hash function found. Contact maintainer\n");
411 bad_srat();
412 return -1;
413 }
e58e0d03
AK
414
415 /* Finally register nodes */
416 for_each_node_mask(i, nodes_parsed)
1da177e4 417 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
a8062231
AK
418 /* Try again in case setup_node_bootmem missed one due
419 to missing bootmem */
420 for_each_node_mask(i, nodes_parsed)
421 if (!node_online(i))
422 setup_node_bootmem(i, nodes[i].start, nodes[i].end);
423
1da177e4
LT
424 for (i = 0; i < NR_CPUS; i++) {
425 if (cpu_to_node[i] == NUMA_NO_NODE)
426 continue;
427 if (!node_isset(cpu_to_node[i], nodes_parsed))
69d81fcd 428 numa_set_node(i, NUMA_NO_NODE);
1da177e4
LT
429 }
430 numa_init_array();
431 return 0;
432}
433
05d1fa4b 434static int node_to_pxm(int n)
1da177e4
LT
435{
436 int i;
437 if (pxm2node[n] == n)
438 return n;
439 for (i = 0; i < 256; i++)
440 if (pxm2node[i] == n)
441 return i;
442 return 0;
443}
444
68a3a7fe
AK
445void __init srat_reserve_add_area(int nodeid)
446{
447 if (found_add_area && nodes_add[nodeid].end) {
448 u64 total_mb;
449
450 printk(KERN_INFO "SRAT: Reserving hot-add memory space "
451 "for node %d at %Lx-%Lx\n",
452 nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
453 total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
454 >> PAGE_SHIFT;
455 total_mb *= sizeof(struct page);
456 total_mb >>= 20;
457 printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
458 "pre-allocated memory.\n", (unsigned long long)total_mb);
459 reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
460 nodes_add[nodeid].end - nodes_add[nodeid].start);
461 }
462}
463
1da177e4
LT
464int __node_distance(int a, int b)
465{
466 int index;
467
468 if (!acpi_slit)
469 return a == b ? 10 : 20;
470 index = acpi_slit->localities * node_to_pxm(a);
471 return acpi_slit->entry[index + node_to_pxm(b)];
472}
473
474EXPORT_SYMBOL(__node_distance);