]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * acpi_numa.c - ACPI NUMA support | |
3 | * | |
4 | * Copyright (C) 2002 Takayoshi Kochi <t-kochi@bq.jp.nec.com> | |
5 | * | |
6 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
19 | * | |
20 | */ | |
21 | #include <linux/module.h> | |
22 | #include <linux/init.h> | |
23 | #include <linux/kernel.h> | |
24 | #include <linux/types.h> | |
25 | #include <linux/errno.h> | |
26 | #include <linux/acpi.h> | |
27 | #include <linux/numa.h> | |
28 | #include <linux/nodemask.h> | |
29 | #include <linux/topology.h> | |
30 | ||
31 | #define PREFIX "ACPI: " | |
32 | ||
33 | #define ACPI_NUMA 0x80000000 | |
34 | #define _COMPONENT ACPI_NUMA | |
35 | ACPI_MODULE_NAME("numa"); | |
36 | ||
37 | static nodemask_t nodes_found_map = NODE_MASK_NONE; | |
38 | ||
39 | /* maps to convert between proximity domain and logical node ID */ | |
40 | static int pxm_to_node_map[MAX_PXM_DOMAINS] | |
41 | = { [0 ... MAX_PXM_DOMAINS - 1] = NUMA_NO_NODE }; | |
42 | static int node_to_pxm_map[MAX_NUMNODES] | |
43 | = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; | |
44 | ||
45 | unsigned char acpi_srat_revision __initdata; | |
46 | ||
47 | int pxm_to_node(int pxm) | |
48 | { | |
49 | if (pxm < 0) | |
50 | return NUMA_NO_NODE; | |
51 | return pxm_to_node_map[pxm]; | |
52 | } | |
53 | ||
54 | int node_to_pxm(int node) | |
55 | { | |
56 | if (node < 0) | |
57 | return PXM_INVAL; | |
58 | return node_to_pxm_map[node]; | |
59 | } | |
60 | ||
61 | static void __acpi_map_pxm_to_node(int pxm, int node) | |
62 | { | |
63 | if (pxm_to_node_map[pxm] == NUMA_NO_NODE || node < pxm_to_node_map[pxm]) | |
64 | pxm_to_node_map[pxm] = node; | |
65 | if (node_to_pxm_map[node] == PXM_INVAL || pxm < node_to_pxm_map[node]) | |
66 | node_to_pxm_map[node] = pxm; | |
67 | } | |
68 | ||
69 | int acpi_map_pxm_to_node(int pxm) | |
70 | { | |
71 | int node; | |
72 | ||
73 | if (pxm < 0 || pxm >= MAX_PXM_DOMAINS) | |
74 | return NUMA_NO_NODE; | |
75 | ||
76 | node = pxm_to_node_map[pxm]; | |
77 | ||
78 | if (node == NUMA_NO_NODE) { | |
79 | if (nodes_weight(nodes_found_map) >= MAX_NUMNODES) | |
80 | return NUMA_NO_NODE; | |
81 | node = first_unset_node(nodes_found_map); | |
82 | __acpi_map_pxm_to_node(pxm, node); | |
83 | node_set(node, nodes_found_map); | |
84 | } | |
85 | ||
86 | return node; | |
87 | } | |
88 | ||
89 | /** | |
90 | * acpi_map_pxm_to_online_node - Map proximity ID to online node | |
91 | * @pxm: ACPI proximity ID | |
92 | * | |
93 | * This is similar to acpi_map_pxm_to_node(), but always returns an online | |
94 | * node. When the mapped node from a given proximity ID is offline, it | |
95 | * looks up the node distance table and returns the nearest online node. | |
96 | * | |
97 | * ACPI device drivers, which are called after the NUMA initialization has | |
98 | * completed in the kernel, can call this interface to obtain their device | |
99 | * NUMA topology from ACPI tables. Such drivers do not have to deal with | |
100 | * offline nodes. A node may be offline when a device proximity ID is | |
101 | * unique, SRAT memory entry does not exist, or NUMA is disabled, ex. | |
102 | * "numa=off" on x86. | |
103 | */ | |
104 | int acpi_map_pxm_to_online_node(int pxm) | |
105 | { | |
106 | int node, n, dist, min_dist; | |
107 | ||
108 | node = acpi_map_pxm_to_node(pxm); | |
109 | ||
110 | if (node == NUMA_NO_NODE) | |
111 | node = 0; | |
112 | ||
113 | if (!node_online(node)) { | |
114 | min_dist = INT_MAX; | |
115 | for_each_online_node(n) { | |
116 | dist = node_distance(node, n); | |
117 | if (dist < min_dist) { | |
118 | min_dist = dist; | |
119 | node = n; | |
120 | } | |
121 | } | |
122 | } | |
123 | ||
124 | return node; | |
125 | } | |
126 | EXPORT_SYMBOL(acpi_map_pxm_to_online_node); | |
127 | ||
128 | static void __init | |
129 | acpi_table_print_srat_entry(struct acpi_subtable_header *header) | |
130 | { | |
131 | ||
132 | ACPI_FUNCTION_NAME("acpi_table_print_srat_entry"); | |
133 | ||
134 | if (!header) | |
135 | return; | |
136 | ||
137 | switch (header->type) { | |
138 | ||
139 | case ACPI_SRAT_TYPE_CPU_AFFINITY: | |
140 | #ifdef ACPI_DEBUG_OUTPUT | |
141 | { | |
142 | struct acpi_srat_cpu_affinity *p = | |
143 | (struct acpi_srat_cpu_affinity *)header; | |
144 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | |
145 | "SRAT Processor (id[0x%02x] eid[0x%02x]) in proximity domain %d %s\n", | |
146 | p->apic_id, p->local_sapic_eid, | |
147 | p->proximity_domain_lo, | |
148 | (p->flags & ACPI_SRAT_CPU_ENABLED)? | |
149 | "enabled" : "disabled")); | |
150 | } | |
151 | #endif /* ACPI_DEBUG_OUTPUT */ | |
152 | break; | |
153 | ||
154 | case ACPI_SRAT_TYPE_MEMORY_AFFINITY: | |
155 | #ifdef ACPI_DEBUG_OUTPUT | |
156 | { | |
157 | struct acpi_srat_mem_affinity *p = | |
158 | (struct acpi_srat_mem_affinity *)header; | |
159 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | |
160 | "SRAT Memory (0x%lx length 0x%lx) in proximity domain %d %s%s%s\n", | |
161 | (unsigned long)p->base_address, | |
162 | (unsigned long)p->length, | |
163 | p->proximity_domain, | |
164 | (p->flags & ACPI_SRAT_MEM_ENABLED)? | |
165 | "enabled" : "disabled", | |
166 | (p->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)? | |
167 | " hot-pluggable" : "", | |
168 | (p->flags & ACPI_SRAT_MEM_NON_VOLATILE)? | |
169 | " non-volatile" : "")); | |
170 | } | |
171 | #endif /* ACPI_DEBUG_OUTPUT */ | |
172 | break; | |
173 | ||
174 | case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: | |
175 | #ifdef ACPI_DEBUG_OUTPUT | |
176 | { | |
177 | struct acpi_srat_x2apic_cpu_affinity *p = | |
178 | (struct acpi_srat_x2apic_cpu_affinity *)header; | |
179 | ACPI_DEBUG_PRINT((ACPI_DB_INFO, | |
180 | "SRAT Processor (x2apicid[0x%08x]) in" | |
181 | " proximity domain %d %s\n", | |
182 | p->apic_id, | |
183 | p->proximity_domain, | |
184 | (p->flags & ACPI_SRAT_CPU_ENABLED) ? | |
185 | "enabled" : "disabled")); | |
186 | } | |
187 | #endif /* ACPI_DEBUG_OUTPUT */ | |
188 | break; | |
189 | default: | |
190 | printk(KERN_WARNING PREFIX | |
191 | "Found unsupported SRAT entry (type = 0x%x)\n", | |
192 | header->type); | |
193 | break; | |
194 | } | |
195 | } | |
196 | ||
197 | /* | |
198 | * A lot of BIOS fill in 10 (= no distance) everywhere. This messes | |
199 | * up the NUMA heuristics which wants the local node to have a smaller | |
200 | * distance than the others. | |
201 | * Do some quick checks here and only use the SLIT if it passes. | |
202 | */ | |
203 | static int __init slit_valid(struct acpi_table_slit *slit) | |
204 | { | |
205 | int i, j; | |
206 | int d = slit->locality_count; | |
207 | for (i = 0; i < d; i++) { | |
208 | for (j = 0; j < d; j++) { | |
209 | u8 val = slit->entry[d*i + j]; | |
210 | if (i == j) { | |
211 | if (val != LOCAL_DISTANCE) | |
212 | return 0; | |
213 | } else if (val <= LOCAL_DISTANCE) | |
214 | return 0; | |
215 | } | |
216 | } | |
217 | return 1; | |
218 | } | |
219 | ||
220 | static int __init acpi_parse_slit(struct acpi_table_header *table) | |
221 | { | |
222 | struct acpi_table_slit *slit = (struct acpi_table_slit *)table; | |
223 | ||
224 | if (!slit_valid(slit)) { | |
225 | printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n"); | |
226 | return -EINVAL; | |
227 | } | |
228 | acpi_numa_slit_init(slit); | |
229 | ||
230 | return 0; | |
231 | } | |
232 | ||
233 | void __init __weak | |
234 | acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |
235 | { | |
236 | printk(KERN_WARNING PREFIX | |
237 | "Found unsupported x2apic [0x%08x] SRAT entry\n", pa->apic_id); | |
238 | return; | |
239 | } | |
240 | ||
241 | ||
242 | static int __init | |
243 | acpi_parse_x2apic_affinity(struct acpi_subtable_header *header, | |
244 | const unsigned long end) | |
245 | { | |
246 | struct acpi_srat_x2apic_cpu_affinity *processor_affinity; | |
247 | ||
248 | processor_affinity = (struct acpi_srat_x2apic_cpu_affinity *)header; | |
249 | if (!processor_affinity) | |
250 | return -EINVAL; | |
251 | ||
252 | acpi_table_print_srat_entry(header); | |
253 | ||
254 | /* let architecture-dependent part to do it */ | |
255 | acpi_numa_x2apic_affinity_init(processor_affinity); | |
256 | ||
257 | return 0; | |
258 | } | |
259 | ||
260 | static int __init | |
261 | acpi_parse_processor_affinity(struct acpi_subtable_header *header, | |
262 | const unsigned long end) | |
263 | { | |
264 | struct acpi_srat_cpu_affinity *processor_affinity; | |
265 | ||
266 | processor_affinity = (struct acpi_srat_cpu_affinity *)header; | |
267 | if (!processor_affinity) | |
268 | return -EINVAL; | |
269 | ||
270 | acpi_table_print_srat_entry(header); | |
271 | ||
272 | /* let architecture-dependent part to do it */ | |
273 | acpi_numa_processor_affinity_init(processor_affinity); | |
274 | ||
275 | return 0; | |
276 | } | |
277 | ||
278 | static int __initdata parsed_numa_memblks; | |
279 | ||
280 | static int __init | |
281 | acpi_parse_memory_affinity(struct acpi_subtable_header * header, | |
282 | const unsigned long end) | |
283 | { | |
284 | struct acpi_srat_mem_affinity *memory_affinity; | |
285 | ||
286 | memory_affinity = (struct acpi_srat_mem_affinity *)header; | |
287 | if (!memory_affinity) | |
288 | return -EINVAL; | |
289 | ||
290 | acpi_table_print_srat_entry(header); | |
291 | ||
292 | /* let architecture-dependent part to do it */ | |
293 | if (!acpi_numa_memory_affinity_init(memory_affinity)) | |
294 | parsed_numa_memblks++; | |
295 | return 0; | |
296 | } | |
297 | ||
298 | static int __init acpi_parse_srat(struct acpi_table_header *table) | |
299 | { | |
300 | struct acpi_table_srat *srat = (struct acpi_table_srat *)table; | |
301 | ||
302 | acpi_srat_revision = srat->header.revision; | |
303 | ||
304 | /* Real work done in acpi_table_parse_srat below. */ | |
305 | ||
306 | return 0; | |
307 | } | |
308 | ||
309 | static int __init | |
310 | acpi_table_parse_srat(enum acpi_srat_type id, | |
311 | acpi_tbl_entry_handler handler, unsigned int max_entries) | |
312 | { | |
313 | return acpi_table_parse_entries(ACPI_SIG_SRAT, | |
314 | sizeof(struct acpi_table_srat), id, | |
315 | handler, max_entries); | |
316 | } | |
317 | ||
318 | int __init acpi_numa_init(void) | |
319 | { | |
320 | int cnt = 0; | |
321 | ||
322 | /* | |
323 | * Should not limit number with cpu num that is from NR_CPUS or nr_cpus= | |
324 | * SRAT cpu entries could have different order with that in MADT. | |
325 | * So go over all cpu entries in SRAT to get apicid to node mapping. | |
326 | */ | |
327 | ||
328 | /* SRAT: Static Resource Affinity Table */ | |
329 | if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { | |
330 | acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, | |
331 | acpi_parse_x2apic_affinity, 0); | |
332 | acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, | |
333 | acpi_parse_processor_affinity, 0); | |
334 | cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, | |
335 | acpi_parse_memory_affinity, | |
336 | NR_NODE_MEMBLKS); | |
337 | } | |
338 | ||
339 | /* SLIT: System Locality Information Table */ | |
340 | acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit); | |
341 | ||
342 | acpi_numa_arch_fixup(); | |
343 | ||
344 | if (cnt < 0) | |
345 | return cnt; | |
346 | else if (!parsed_numa_memblks) | |
347 | return -ENOENT; | |
348 | return 0; | |
349 | } | |
350 | ||
351 | static int acpi_get_pxm(acpi_handle h) | |
352 | { | |
353 | unsigned long long pxm; | |
354 | acpi_status status; | |
355 | acpi_handle handle; | |
356 | acpi_handle phandle = h; | |
357 | ||
358 | do { | |
359 | handle = phandle; | |
360 | status = acpi_evaluate_integer(handle, "_PXM", NULL, &pxm); | |
361 | if (ACPI_SUCCESS(status)) | |
362 | return pxm; | |
363 | status = acpi_get_parent(handle, &phandle); | |
364 | } while (ACPI_SUCCESS(status)); | |
365 | return -1; | |
366 | } | |
367 | ||
368 | int acpi_get_node(acpi_handle handle) | |
369 | { | |
370 | int pxm; | |
371 | ||
372 | pxm = acpi_get_pxm(handle); | |
373 | ||
374 | return acpi_map_pxm_to_node(pxm); | |
375 | } | |
376 | EXPORT_SYMBOL(acpi_get_node); |