]>
git.proxmox.com Git - mirror_ubuntu-kernels.git/blob - arch/x86/mm/numa_64.c
95ea1551eebca344bee38ebc160ba8f3057c1734
2 * Generic VM initialization for x86-64 NUMA setups.
3 * Copyright 2002,2003 Andi Kleen, SuSE Labs.
5 #include <linux/kernel.h>
7 #include <linux/string.h>
8 #include <linux/init.h>
9 #include <linux/bootmem.h>
10 #include <linux/memblock.h>
11 #include <linux/mmzone.h>
12 #include <linux/ctype.h>
13 #include <linux/module.h>
14 #include <linux/nodemask.h>
15 #include <linux/sched.h>
18 #include <asm/proto.h>
22 #include <asm/amd_nb.h>
24 struct pglist_data
*node_data
[MAX_NUMNODES
] __read_mostly
;
25 EXPORT_SYMBOL(node_data
);
27 struct memnode memnode
;
29 s16 apicid_to_node
[MAX_LOCAL_APIC
] __cpuinitdata
= {
30 [0 ... MAX_LOCAL_APIC
-1] = NUMA_NO_NODE
33 static unsigned long __initdata nodemap_addr
;
34 static unsigned long __initdata nodemap_size
;
37 * Map cpu index to node index
39 DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map
, NUMA_NO_NODE
);
40 EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map
);
43 * Given a shift value, try to populate memnodemap[]
46 * 0 if memnodmap[] too small (of shift too small)
47 * -1 if node overlap or lost ram (shift too big)
49 static int __init
populate_memnodemap(const struct bootnode
*nodes
,
50 int numnodes
, int shift
, int *nodeids
)
52 unsigned long addr
, end
;
55 memset(memnodemap
, 0xff, sizeof(s16
)*memnodemapsize
);
56 for (i
= 0; i
< numnodes
; i
++) {
57 addr
= nodes
[i
].start
;
61 if ((end
>> shift
) >= memnodemapsize
)
64 if (memnodemap
[addr
>> shift
] != NUMA_NO_NODE
)
68 memnodemap
[addr
>> shift
] = i
;
70 memnodemap
[addr
>> shift
] = nodeids
[i
];
72 addr
+= (1UL << shift
);
79 static int __init
allocate_cachealigned_memnodemap(void)
83 memnodemap
= memnode
.embedded_map
;
84 if (memnodemapsize
<= ARRAY_SIZE(memnode
.embedded_map
))
88 nodemap_size
= roundup(sizeof(s16
) * memnodemapsize
, L1_CACHE_BYTES
);
89 nodemap_addr
= memblock_find_in_range(addr
, max_pfn
<<PAGE_SHIFT
,
90 nodemap_size
, L1_CACHE_BYTES
);
91 if (nodemap_addr
== MEMBLOCK_ERROR
) {
93 "NUMA: Unable to allocate Memory to Node hash map\n");
94 nodemap_addr
= nodemap_size
= 0;
97 memnodemap
= phys_to_virt(nodemap_addr
);
98 memblock_x86_reserve_range(nodemap_addr
, nodemap_addr
+ nodemap_size
, "MEMNODEMAP");
100 printk(KERN_DEBUG
"NUMA: Allocated memnodemap from %lx - %lx\n",
101 nodemap_addr
, nodemap_addr
+ nodemap_size
);
106 * The LSB of all start and end addresses in the node map is the value of the
107 * maximum possible shift.
109 static int __init
extract_lsb_from_nodes(const struct bootnode
*nodes
,
112 int i
, nodes_used
= 0;
113 unsigned long start
, end
;
114 unsigned long bitfield
= 0, memtop
= 0;
116 for (i
= 0; i
< numnodes
; i
++) {
117 start
= nodes
[i
].start
;
129 i
= find_first_bit(&bitfield
, sizeof(unsigned long)*8);
130 memnodemapsize
= (memtop
>> i
)+1;
134 int __init
compute_hash_shift(struct bootnode
*nodes
, int numnodes
,
139 shift
= extract_lsb_from_nodes(nodes
, numnodes
);
140 if (allocate_cachealigned_memnodemap())
142 printk(KERN_DEBUG
"NUMA: Using %d for the hash shift.\n",
145 if (populate_memnodemap(nodes
, numnodes
, shift
, nodeids
) != 1) {
146 printk(KERN_INFO
"Your memory is not aligned you need to "
147 "rebuild your kernel with a bigger NODEMAPSIZE "
148 "shift=%d\n", shift
);
154 int __meminit
__early_pfn_to_nid(unsigned long pfn
)
156 return phys_to_nid(pfn
<< PAGE_SHIFT
);
159 static void * __init
early_node_mem(int nodeid
, unsigned long start
,
160 unsigned long end
, unsigned long size
,
166 * put it on high as possible
167 * something will go with NODE_DATA
169 if (start
< (MAX_DMA_PFN
<<PAGE_SHIFT
))
170 start
= MAX_DMA_PFN
<<PAGE_SHIFT
;
171 if (start
< (MAX_DMA32_PFN
<<PAGE_SHIFT
) &&
172 end
> (MAX_DMA32_PFN
<<PAGE_SHIFT
))
173 start
= MAX_DMA32_PFN
<<PAGE_SHIFT
;
174 mem
= memblock_x86_find_in_range_node(nodeid
, start
, end
, size
, align
);
175 if (mem
!= MEMBLOCK_ERROR
)
178 /* extend the search scope */
179 end
= max_pfn_mapped
<< PAGE_SHIFT
;
180 start
= MAX_DMA_PFN
<< PAGE_SHIFT
;
181 mem
= memblock_find_in_range(start
, end
, size
, align
);
182 if (mem
!= MEMBLOCK_ERROR
)
185 printk(KERN_ERR
"Cannot find %lu bytes in node %d\n",
191 /* Initialize bootmem allocator for a node */
193 setup_node_bootmem(int nodeid
, unsigned long start
, unsigned long end
)
195 unsigned long start_pfn
, last_pfn
, nodedata_phys
;
196 const int pgdat_size
= roundup(sizeof(pg_data_t
), PAGE_SIZE
);
203 * Don't confuse VM with a node that doesn't have the
204 * minimum amount of memory:
206 if (end
&& (end
- start
) < NODE_MIN_SIZE
)
209 start
= roundup(start
, ZONE_ALIGN
);
211 printk(KERN_INFO
"Initmem setup node %d %016lx-%016lx\n", nodeid
,
214 start_pfn
= start
>> PAGE_SHIFT
;
215 last_pfn
= end
>> PAGE_SHIFT
;
217 node_data
[nodeid
] = early_node_mem(nodeid
, start
, end
, pgdat_size
,
219 if (node_data
[nodeid
] == NULL
)
221 nodedata_phys
= __pa(node_data
[nodeid
]);
222 memblock_x86_reserve_range(nodedata_phys
, nodedata_phys
+ pgdat_size
, "NODE_DATA");
223 printk(KERN_INFO
" NODE_DATA [%016lx - %016lx]\n", nodedata_phys
,
224 nodedata_phys
+ pgdat_size
- 1);
225 nid
= phys_to_nid(nodedata_phys
);
227 printk(KERN_INFO
" NODE_DATA(%d) on node %d\n", nodeid
, nid
);
229 memset(NODE_DATA(nodeid
), 0, sizeof(pg_data_t
));
230 NODE_DATA(nodeid
)->node_id
= nodeid
;
231 NODE_DATA(nodeid
)->node_start_pfn
= start_pfn
;
232 NODE_DATA(nodeid
)->node_spanned_pages
= last_pfn
- start_pfn
;
234 node_set_online(nodeid
);
238 * There are unfortunately some poorly designed mainboards around that
239 * only connect memory to a single CPU. This breaks the 1:1 cpu->node
240 * mapping. To avoid this fill in the mapping for all possible CPUs,
241 * as the number of CPUs is not known yet. We round robin the existing
244 void __init
numa_init_array(void)
248 rr
= first_node(node_online_map
);
249 for (i
= 0; i
< nr_cpu_ids
; i
++) {
250 if (early_cpu_to_node(i
) != NUMA_NO_NODE
)
252 numa_set_node(i
, rr
);
253 rr
= next_node(rr
, node_online_map
);
254 if (rr
== MAX_NUMNODES
)
255 rr
= first_node(node_online_map
);
259 #ifdef CONFIG_NUMA_EMU
261 static struct bootnode nodes
[MAX_NUMNODES
] __initdata
;
262 static struct bootnode physnodes
[MAX_NUMNODES
] __cpuinitdata
;
263 static char *cmdline __initdata
;
265 void __init
numa_emu_cmdline(char *str
)
270 static int __init
setup_physnodes(unsigned long start
, unsigned long end
,
276 memset(physnodes
, 0, sizeof(physnodes
));
277 #ifdef CONFIG_ACPI_NUMA
279 acpi_get_nodes(physnodes
, start
, end
);
281 #ifdef CONFIG_AMD_NUMA
283 amd_get_nodes(physnodes
);
286 * Basic sanity checking on the physical node map: there may be errors
287 * if the SRAT or AMD code incorrectly reported the topology or the mem=
288 * kernel parameter is used.
290 for (i
= 0; i
< MAX_NUMNODES
; i
++) {
291 if (physnodes
[i
].start
== physnodes
[i
].end
)
293 if (physnodes
[i
].start
> end
) {
294 physnodes
[i
].end
= physnodes
[i
].start
;
297 if (physnodes
[i
].end
< start
) {
298 physnodes
[i
].start
= physnodes
[i
].end
;
301 if (physnodes
[i
].start
< start
)
302 physnodes
[i
].start
= start
;
303 if (physnodes
[i
].end
> end
)
304 physnodes
[i
].end
= end
;
309 * If no physical topology was detected, a single node is faked to cover
310 * the entire address space.
313 physnodes
[ret
].start
= start
;
314 physnodes
[ret
].end
= end
;
320 static void __init
fake_physnodes(int acpi
, int amd
, int nr_nodes
)
325 #ifdef CONFIG_ACPI_NUMA
327 acpi_fake_nodes(nodes
, nr_nodes
);
329 #ifdef CONFIG_AMD_NUMA
331 amd_fake_nodes(nodes
, nr_nodes
);
334 for (i
= 0; i
< nr_cpu_ids
; i
++)
339 * Setups up nid to range from addr to addr + size. If the end
340 * boundary is greater than max_addr, then max_addr is used instead.
341 * The return value is 0 if there is additional memory left for
342 * allocation past addr and -1 otherwise. addr is adjusted to be at
343 * the end of the node.
345 static int __init
setup_node_range(int nid
, u64
*addr
, u64 size
, u64 max_addr
)
348 nodes
[nid
].start
= *addr
;
350 if (*addr
>= max_addr
) {
354 nodes
[nid
].end
= *addr
;
355 node_set(nid
, node_possible_map
);
356 printk(KERN_INFO
"Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid
,
357 nodes
[nid
].start
, nodes
[nid
].end
,
358 (nodes
[nid
].end
- nodes
[nid
].start
) >> 20);
363 * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr
364 * to max_addr. The return value is the number of nodes allocated.
366 static int __init
split_nodes_interleave(u64 addr
, u64 max_addr
, int nr_nodes
)
368 nodemask_t physnode_mask
= NODE_MASK_NONE
;
376 if (nr_nodes
> MAX_NUMNODES
) {
377 pr_info("numa=fake=%d too large, reducing to %d\n",
378 nr_nodes
, MAX_NUMNODES
);
379 nr_nodes
= MAX_NUMNODES
;
382 size
= (max_addr
- addr
- memblock_x86_hole_size(addr
, max_addr
)) / nr_nodes
;
384 * Calculate the number of big nodes that can be allocated as a result
385 * of consolidating the remainder.
387 big
= ((size
& ~FAKE_NODE_MIN_HASH_MASK
) * nr_nodes
) /
390 size
&= FAKE_NODE_MIN_HASH_MASK
;
392 pr_err("Not enough memory for each node. "
393 "NUMA emulation disabled.\n");
397 for (i
= 0; i
< MAX_NUMNODES
; i
++)
398 if (physnodes
[i
].start
!= physnodes
[i
].end
)
399 node_set(i
, physnode_mask
);
402 * Continue to fill physical nodes with fake nodes until there is no
403 * memory left on any of them.
405 while (nodes_weight(physnode_mask
)) {
406 for_each_node_mask(i
, physnode_mask
) {
407 u64 end
= physnodes
[i
].start
+ size
;
408 u64 dma32_end
= PFN_PHYS(MAX_DMA32_PFN
);
411 end
+= FAKE_NODE_MIN_SIZE
;
414 * Continue to add memory to this fake node if its
415 * non-reserved memory is less than the per-node size.
417 while (end
- physnodes
[i
].start
-
418 memblock_x86_hole_size(physnodes
[i
].start
, end
) < size
) {
419 end
+= FAKE_NODE_MIN_SIZE
;
420 if (end
> physnodes
[i
].end
) {
421 end
= physnodes
[i
].end
;
427 * If there won't be at least FAKE_NODE_MIN_SIZE of
428 * non-reserved memory in ZONE_DMA32 for the next node,
429 * this one must extend to the boundary.
431 if (end
< dma32_end
&& dma32_end
- end
-
432 memblock_x86_hole_size(end
, dma32_end
) < FAKE_NODE_MIN_SIZE
)
436 * If there won't be enough non-reserved memory for the
437 * next node, this one must extend to the end of the
440 if (physnodes
[i
].end
- end
-
441 memblock_x86_hole_size(end
, physnodes
[i
].end
) < size
)
442 end
= physnodes
[i
].end
;
445 * Avoid allocating more nodes than requested, which can
446 * happen as a result of rounding down each node's size
447 * to FAKE_NODE_MIN_SIZE.
449 if (nodes_weight(physnode_mask
) + ret
>= nr_nodes
)
450 end
= physnodes
[i
].end
;
452 if (setup_node_range(ret
++, &physnodes
[i
].start
,
453 end
- physnodes
[i
].start
,
454 physnodes
[i
].end
) < 0)
455 node_clear(i
, physnode_mask
);
462 * Returns the end address of a node so that there is at least `size' amount of
463 * non-reserved memory or `max_addr' is reached.
465 static u64 __init
find_end_of_node(u64 start
, u64 max_addr
, u64 size
)
467 u64 end
= start
+ size
;
469 while (end
- start
- memblock_x86_hole_size(start
, end
) < size
) {
470 end
+= FAKE_NODE_MIN_SIZE
;
471 if (end
> max_addr
) {
480 * Sets up fake nodes of `size' interleaved over physical nodes ranging from
481 * `addr' to `max_addr'. The return value is the number of nodes allocated.
483 static int __init
split_nodes_size_interleave(u64 addr
, u64 max_addr
, u64 size
)
485 nodemask_t physnode_mask
= NODE_MASK_NONE
;
493 * The limit on emulated nodes is MAX_NUMNODES, so the size per node is
494 * increased accordingly if the requested size is too small. This
495 * creates a uniform distribution of node sizes across the entire
496 * machine (but not necessarily over physical nodes).
498 min_size
= (max_addr
- addr
- memblock_x86_hole_size(addr
, max_addr
)) /
500 min_size
= max(min_size
, FAKE_NODE_MIN_SIZE
);
501 if ((min_size
& FAKE_NODE_MIN_HASH_MASK
) < min_size
)
502 min_size
= (min_size
+ FAKE_NODE_MIN_SIZE
) &
503 FAKE_NODE_MIN_HASH_MASK
;
504 if (size
< min_size
) {
505 pr_err("Fake node size %LuMB too small, increasing to %LuMB\n",
506 size
>> 20, min_size
>> 20);
509 size
&= FAKE_NODE_MIN_HASH_MASK
;
511 for (i
= 0; i
< MAX_NUMNODES
; i
++)
512 if (physnodes
[i
].start
!= physnodes
[i
].end
)
513 node_set(i
, physnode_mask
);
515 * Fill physical nodes with fake nodes of size until there is no memory
516 * left on any of them.
518 while (nodes_weight(physnode_mask
)) {
519 for_each_node_mask(i
, physnode_mask
) {
520 u64 dma32_end
= MAX_DMA32_PFN
<< PAGE_SHIFT
;
523 end
= find_end_of_node(physnodes
[i
].start
,
524 physnodes
[i
].end
, size
);
526 * If there won't be at least FAKE_NODE_MIN_SIZE of
527 * non-reserved memory in ZONE_DMA32 for the next node,
528 * this one must extend to the boundary.
530 if (end
< dma32_end
&& dma32_end
- end
-
531 memblock_x86_hole_size(end
, dma32_end
) < FAKE_NODE_MIN_SIZE
)
535 * If there won't be enough non-reserved memory for the
536 * next node, this one must extend to the end of the
539 if (physnodes
[i
].end
- end
-
540 memblock_x86_hole_size(end
, physnodes
[i
].end
) < size
)
541 end
= physnodes
[i
].end
;
544 * Setup the fake node that will be allocated as bootmem
545 * later. If setup_node_range() returns non-zero, there
546 * is no more memory available on this physical node.
548 if (setup_node_range(ret
++, &physnodes
[i
].start
,
549 end
- physnodes
[i
].start
,
550 physnodes
[i
].end
) < 0)
551 node_clear(i
, physnode_mask
);
558 * Sets up the system RAM area from start_pfn to last_pfn according to the
559 * numa=fake command-line option.
561 static int __init
numa_emulation(unsigned long start_pfn
,
562 unsigned long last_pfn
, int acpi
, int amd
)
564 u64 addr
= start_pfn
<< PAGE_SHIFT
;
565 u64 max_addr
= last_pfn
<< PAGE_SHIFT
;
570 * If the numa=fake command-line contains a 'M' or 'G', it represents
571 * the fixed node size. Otherwise, if it is just a single number N,
572 * split the system RAM into N fake nodes.
574 if (strchr(cmdline
, 'M') || strchr(cmdline
, 'G')) {
577 size
= memparse(cmdline
, &cmdline
);
578 num_nodes
= split_nodes_size_interleave(addr
, max_addr
, size
);
582 n
= simple_strtoul(cmdline
, NULL
, 0);
583 num_nodes
= split_nodes_interleave(addr
, max_addr
, n
);
588 memnode_shift
= compute_hash_shift(nodes
, num_nodes
, NULL
);
589 if (memnode_shift
< 0) {
591 printk(KERN_ERR
"No NUMA hash function found. NUMA emulation "
597 * We need to vacate all active ranges that may have been registered for
598 * the e820 memory map.
600 remove_all_active_ranges();
601 for_each_node_mask(i
, node_possible_map
) {
602 memblock_x86_register_active_regions(i
, nodes
[i
].start
>> PAGE_SHIFT
,
603 nodes
[i
].end
>> PAGE_SHIFT
);
604 setup_node_bootmem(i
, nodes
[i
].start
, nodes
[i
].end
);
606 setup_physnodes(addr
, max_addr
, acpi
, amd
);
607 fake_physnodes(acpi
, amd
, num_nodes
);
611 #endif /* CONFIG_NUMA_EMU */
613 void __init
initmem_init(unsigned long start_pfn
, unsigned long last_pfn
,
618 nodes_clear(node_possible_map
);
619 nodes_clear(node_online_map
);
621 #ifdef CONFIG_NUMA_EMU
622 setup_physnodes(start_pfn
<< PAGE_SHIFT
, last_pfn
<< PAGE_SHIFT
,
624 if (cmdline
&& !numa_emulation(start_pfn
, last_pfn
, acpi
, amd
))
626 setup_physnodes(start_pfn
<< PAGE_SHIFT
, last_pfn
<< PAGE_SHIFT
,
628 nodes_clear(node_possible_map
);
629 nodes_clear(node_online_map
);
632 #ifdef CONFIG_ACPI_NUMA
633 if (!numa_off
&& acpi
&& !acpi_scan_nodes(start_pfn
<< PAGE_SHIFT
,
634 last_pfn
<< PAGE_SHIFT
))
636 nodes_clear(node_possible_map
);
637 nodes_clear(node_online_map
);
640 #ifdef CONFIG_AMD_NUMA
641 if (!numa_off
&& amd
&& !amd_scan_nodes())
643 nodes_clear(node_possible_map
);
644 nodes_clear(node_online_map
);
646 printk(KERN_INFO
"%s\n",
647 numa_off
? "NUMA turned off" : "No NUMA configuration found");
649 printk(KERN_INFO
"Faking a node at %016lx-%016lx\n",
650 start_pfn
<< PAGE_SHIFT
,
651 last_pfn
<< PAGE_SHIFT
);
652 /* setup dummy node covering all memory */
654 memnodemap
= memnode
.embedded_map
;
657 node_set(0, node_possible_map
);
658 for (i
= 0; i
< nr_cpu_ids
; i
++)
660 memblock_x86_register_active_regions(0, start_pfn
, last_pfn
);
661 setup_node_bootmem(0, start_pfn
<< PAGE_SHIFT
, last_pfn
<< PAGE_SHIFT
);
664 unsigned long __init
numa_free_all_bootmem(void)
666 unsigned long pages
= 0;
669 for_each_online_node(i
)
670 pages
+= free_all_bootmem_node(NODE_DATA(i
));
672 pages
+= free_all_memory_core_early(MAX_NUMNODES
);
679 static __init
int find_near_online_node(int node
)
682 int min_val
= INT_MAX
;
685 for_each_online_node(n
) {
686 val
= node_distance(node
, n
);
698 * Setup early cpu_to_node.
700 * Populate cpu_to_node[] only if x86_cpu_to_apicid[],
701 * and apicid_to_node[] tables have valid entries for a CPU.
702 * This means we skip cpu_to_node[] initialisation for NUMA
703 * emulation and faking node case (when running a kernel compiled
704 * for NUMA on a non NUMA box), which is OK as cpu_to_node[]
705 * is already initialized in a round robin manner at numa_init_array,
706 * prior to this call, and this initialization is good enough
707 * for the fake NUMA cases.
709 * Called before the per_cpu areas are setup.
711 void __init
init_cpu_to_node(void)
714 u16
*cpu_to_apicid
= early_per_cpu_ptr(x86_cpu_to_apicid
);
716 BUG_ON(cpu_to_apicid
== NULL
);
718 for_each_possible_cpu(cpu
) {
720 u16 apicid
= cpu_to_apicid
[cpu
];
722 if (apicid
== BAD_APICID
)
724 node
= apicid_to_node
[apicid
];
725 if (node
== NUMA_NO_NODE
)
727 if (!node_online(node
))
728 node
= find_near_online_node(node
);
729 numa_set_node(cpu
, node
);
735 void __cpuinit
numa_set_node(int cpu
, int node
)
737 int *cpu_to_node_map
= early_per_cpu_ptr(x86_cpu_to_node_map
);
739 /* early setting, no percpu area yet */
740 if (cpu_to_node_map
) {
741 cpu_to_node_map
[cpu
] = node
;
745 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
746 if (cpu
>= nr_cpu_ids
|| !cpu_possible(cpu
)) {
747 printk(KERN_ERR
"numa_set_node: invalid cpu# (%d)\n", cpu
);
752 per_cpu(x86_cpu_to_node_map
, cpu
) = node
;
754 if (node
!= NUMA_NO_NODE
)
755 set_cpu_numa_node(cpu
, node
);
758 void __cpuinit
numa_clear_node(int cpu
)
760 numa_set_node(cpu
, NUMA_NO_NODE
);
763 #ifndef CONFIG_DEBUG_PER_CPU_MAPS
765 #ifndef CONFIG_NUMA_EMU
766 void __cpuinit
numa_add_cpu(int cpu
)
768 cpumask_set_cpu(cpu
, node_to_cpumask_map
[early_cpu_to_node(cpu
)]);
771 void __cpuinit
numa_remove_cpu(int cpu
)
773 cpumask_clear_cpu(cpu
, node_to_cpumask_map
[early_cpu_to_node(cpu
)]);
776 void __cpuinit
numa_add_cpu(int cpu
)
781 int nid
= NUMA_NO_NODE
;
783 apicid
= early_per_cpu(x86_cpu_to_apicid
, cpu
);
784 if (apicid
!= BAD_APICID
)
785 nid
= apicid_to_node
[apicid
];
786 if (nid
== NUMA_NO_NODE
)
787 nid
= early_cpu_to_node(cpu
);
788 BUG_ON(nid
== NUMA_NO_NODE
|| !node_online(nid
));
791 * Use the starting address of the emulated node to find which physical
792 * node it is allocated on.
794 addr
= node_start_pfn(nid
) << PAGE_SHIFT
;
795 for (physnid
= 0; physnid
< MAX_NUMNODES
; physnid
++)
796 if (addr
>= physnodes
[physnid
].start
&&
797 addr
< physnodes
[physnid
].end
)
801 * Map the cpu to each emulated node that is allocated on the physical
802 * node of the cpu's apic id.
804 for_each_online_node(nid
) {
805 addr
= node_start_pfn(nid
) << PAGE_SHIFT
;
806 if (addr
>= physnodes
[physnid
].start
&&
807 addr
< physnodes
[physnid
].end
)
808 cpumask_set_cpu(cpu
, node_to_cpumask_map
[nid
]);
812 void __cpuinit
numa_remove_cpu(int cpu
)
816 for_each_online_node(i
)
817 cpumask_clear_cpu(cpu
, node_to_cpumask_map
[i
]);
819 #endif /* !CONFIG_NUMA_EMU */
821 #else /* CONFIG_DEBUG_PER_CPU_MAPS */
822 static struct cpumask __cpuinit
*debug_cpumask_set_cpu(int cpu
, int enable
)
824 int node
= early_cpu_to_node(cpu
);
825 struct cpumask
*mask
;
828 mask
= node_to_cpumask_map
[node
];
830 pr_err("node_to_cpumask_map[%i] NULL\n", node
);
835 cpulist_scnprintf(buf
, sizeof(buf
), mask
);
836 printk(KERN_DEBUG
"%s cpu %d node %d: mask now %s\n",
837 enable
? "numa_add_cpu" : "numa_remove_cpu",
843 * --------- debug versions of the numa functions ---------
845 #ifndef CONFIG_NUMA_EMU
846 static void __cpuinit
numa_set_cpumask(int cpu
, int enable
)
848 struct cpumask
*mask
;
850 mask
= debug_cpumask_set_cpu(cpu
, enable
);
855 cpumask_set_cpu(cpu
, mask
);
857 cpumask_clear_cpu(cpu
, mask
);
860 static void __cpuinit
numa_set_cpumask(int cpu
, int enable
)
862 int node
= early_cpu_to_node(cpu
);
863 struct cpumask
*mask
;
866 for_each_online_node(i
) {
869 addr
= node_start_pfn(i
) << PAGE_SHIFT
;
870 if (addr
< physnodes
[node
].start
||
871 addr
>= physnodes
[node
].end
)
873 mask
= debug_cpumask_set_cpu(cpu
, enable
);
878 cpumask_set_cpu(cpu
, mask
);
880 cpumask_clear_cpu(cpu
, mask
);
883 #endif /* CONFIG_NUMA_EMU */
885 void __cpuinit
numa_add_cpu(int cpu
)
887 numa_set_cpumask(cpu
, 1);
890 void __cpuinit
numa_remove_cpu(int cpu
)
892 numa_set_cpumask(cpu
, 0);
895 int __cpu_to_node(int cpu
)
897 if (early_per_cpu_ptr(x86_cpu_to_node_map
)) {
899 "cpu_to_node(%d): usage too early!\n", cpu
);
901 return early_per_cpu_ptr(x86_cpu_to_node_map
)[cpu
];
903 return per_cpu(x86_cpu_to_node_map
, cpu
);
905 EXPORT_SYMBOL(__cpu_to_node
);
908 * Same function as cpu_to_node() but used if called before the
909 * per_cpu areas are setup.
911 int early_cpu_to_node(int cpu
)
913 if (early_per_cpu_ptr(x86_cpu_to_node_map
))
914 return early_per_cpu_ptr(x86_cpu_to_node_map
)[cpu
];
916 if (!cpu_possible(cpu
)) {
918 "early_cpu_to_node(%d): no per_cpu area!\n", cpu
);
922 return per_cpu(x86_cpu_to_node_map
, cpu
);
926 * --------- end of debug versions of the numa functions ---------
929 #endif /* CONFIG_DEBUG_PER_CPU_MAPS */