2 * Copyright (c) 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 /* On non-Linux, these functions are defined inline in ovs-numa.h. */
28 #include <sys/types.h>
33 #include "openvswitch/list.h"
34 #include "ovs-thread.h"
35 #include "openvswitch/vlog.h"
37 VLOG_DEFINE_THIS_MODULE(ovs_numa
);
42 * This module stores the affinity information of numa nodes and cpu cores.
43 * It also provides functions to bookkeep the pin of threads on cpu cores.
45 * It is assumed that the numa node ids and cpu core ids all start from 0 and
46 * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N,
47 * user can assume core ids from 0 to N-1 are all valid and there is a
48 * 'struct cpu_core' for each id.
50 * NOTE, this module should only be used by the main thread.
52 * NOTE, the assumption above will fail when cpu hotplug is used. In that
53 * case ovs-numa will not function correctly. For now, add a TODO entry
54 * for addressing it in the future.
56 * TODO: Fix ovs-numa when cpu hotplug is used.
59 #define MAX_NUMA_NODES 128
63 struct hmap_node hmap_node
; /* In the 'all_numa_nodes'. */
64 struct ovs_list cores
; /* List of cpu cores on the numa node. */
65 int numa_id
; /* numa node id. */
68 /* Cpu core on a numa node. */
70 struct hmap_node hmap_node
;/* In the 'all_cpu_cores'. */
71 struct ovs_list list_node
; /* In 'numa_node->cores' list. */
72 struct numa_node
*numa
; /* numa node containing the core. */
73 unsigned core_id
; /* Core id. */
74 bool available
; /* If the core can be pinned. */
75 bool pinned
; /* If a thread has been pinned to the core. */
78 /* Contains all 'struct numa_node's. */
79 static struct hmap all_numa_nodes
= HMAP_INITIALIZER(&all_numa_nodes
);
80 /* Contains all 'struct cpu_core's. */
81 static struct hmap all_cpu_cores
= HMAP_INITIALIZER(&all_cpu_cores
);
82 /* True if numa node and core info are correctly extracted. */
83 static bool found_numa_and_core
;
85 /* Returns true if 'str' contains all digits. Returns false otherwise. */
87 contain_all_digits(const char *str
)
89 return str
[strspn(str
, "0123456789")] == '\0';
92 /* Discovers all numa nodes and the corresponding cpu cores.
93 * Constructs the 'struct numa_node' and 'struct cpu_core'. */
95 discover_numa_and_core(void)
100 bool numa_supported
= true;
102 /* Check if NUMA supported on this system. */
103 dir
= opendir("/sys/devices/system/node");
105 if (!dir
&& errno
== ENOENT
) {
106 numa_supported
= false;
112 for (i
= 0; i
< MAX_NUMA_NODES
; i
++) {
115 if (numa_supported
) {
116 /* Constructs the path to node /sys/devices/system/nodeX. */
117 path
= xasprintf("/sys/devices/system/node/node%d", i
);
119 path
= xasprintf("/sys/devices/system/cpu/");
124 /* Creates 'struct numa_node' if the 'dir' is non-null. */
126 struct numa_node
*n
= xzalloc(sizeof *n
);
127 struct dirent
*subdir
;
129 hmap_insert(&all_numa_nodes
, &n
->hmap_node
, hash_int(i
, 0));
130 list_init(&n
->cores
);
133 while ((subdir
= readdir(dir
)) != NULL
) {
134 if (!strncmp(subdir
->d_name
, "cpu", 3)
135 && contain_all_digits(subdir
->d_name
+ 3)){
136 struct cpu_core
*c
= xzalloc(sizeof *c
);
139 core_id
= strtoul(subdir
->d_name
+ 3, NULL
, 10);
140 hmap_insert(&all_cpu_cores
, &c
->hmap_node
,
141 hash_int(core_id
, 0));
142 list_insert(&n
->cores
, &c
->list_node
);
143 c
->core_id
= core_id
;
149 VLOG_INFO("Discovered %"PRIuSIZE
" CPU cores on NUMA node %d",
150 list_size(&n
->cores
), n
->numa_id
);
152 } else if (errno
!= ENOENT
) {
153 VLOG_WARN("opendir(%s) failed (%s)", path
,
154 ovs_strerror(errno
));
158 if (!dir
|| !numa_supported
) {
163 VLOG_INFO("Discovered %"PRIuSIZE
" NUMA nodes and %d CPU cores",
164 hmap_count(&all_numa_nodes
), n_cpus
);
165 if (hmap_count(&all_numa_nodes
) && hmap_count(&all_cpu_cores
)) {
166 found_numa_and_core
= true;
170 /* Gets 'struct cpu_core' by 'core_id'. */
171 static struct cpu_core
*
172 get_core_by_core_id(unsigned core_id
)
174 struct cpu_core
*core
= NULL
;
176 if (ovs_numa_core_id_is_valid(core_id
)) {
177 core
= CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores
,
178 hash_int(core_id
, 0)),
179 struct cpu_core
, hmap_node
);
185 /* Gets 'struct numa_node' by 'numa_id'. */
186 static struct numa_node
*
187 get_numa_by_numa_id(int numa_id
)
189 struct numa_node
*numa
= NULL
;
191 if (ovs_numa_numa_id_is_valid(numa_id
)) {
192 numa
= CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes
,
193 hash_int(numa_id
, 0)),
194 struct numa_node
, hmap_node
);
201 /* Extracts the numa node and core info from the 'sysfs'. */
205 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
207 if (ovsthread_once_start(&once
)) {
208 discover_numa_and_core();
209 ovsthread_once_done(&once
);
214 ovs_numa_numa_id_is_valid(int numa_id
)
216 return found_numa_and_core
&& numa_id
< ovs_numa_get_n_numas();
220 ovs_numa_core_id_is_valid(unsigned core_id
)
222 return found_numa_and_core
&& core_id
< ovs_numa_get_n_cores();
226 ovs_numa_core_is_pinned(unsigned core_id
)
228 struct cpu_core
*core
= get_core_by_core_id(core_id
);
237 /* Returns the number of numa nodes. */
239 ovs_numa_get_n_numas(void)
241 return found_numa_and_core
? hmap_count(&all_numa_nodes
)
245 /* Returns the number of cpu cores. */
247 ovs_numa_get_n_cores(void)
249 return found_numa_and_core
? hmap_count(&all_cpu_cores
)
253 /* Given 'core_id', returns the corresponding numa node id. Returns
254 * OVS_NUMA_UNSPEC if 'core_id' is invalid. */
256 ovs_numa_get_numa_id(unsigned core_id
)
258 struct cpu_core
*core
= get_core_by_core_id(core_id
);
261 return core
->numa
->numa_id
;
264 return OVS_NUMA_UNSPEC
;
267 /* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC
268 * if 'numa_id' is invalid. */
270 ovs_numa_get_n_cores_on_numa(int numa_id
)
272 struct numa_node
*numa
= get_numa_by_numa_id(numa_id
);
275 return list_size(&numa
->cores
);
278 return OVS_CORE_UNSPEC
;
281 /* Returns the number of cpu cores that are available and unpinned
282 * on numa node. Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */
284 ovs_numa_get_n_unpinned_cores_on_numa(int numa_id
)
286 struct numa_node
*numa
= get_numa_by_numa_id(numa_id
);
289 struct cpu_core
*core
;
292 LIST_FOR_EACH(core
, list_node
, &numa
->cores
) {
293 if (core
->available
&& !core
->pinned
) {
300 return OVS_CORE_UNSPEC
;
303 /* Given 'core_id', tries to pin that core. Returns true, if succeeds.
304 * False, if the core has already been pinned, or if it is invalid or
307 ovs_numa_try_pin_core_specific(unsigned core_id
)
309 struct cpu_core
*core
= get_core_by_core_id(core_id
);
312 if (core
->available
&& !core
->pinned
) {
321 /* Searches through all cores for an unpinned and available core. Returns
322 * the 'core_id' if found and sets the 'core->pinned' to true. Otherwise,
323 * returns OVS_CORE_UNSPEC. */
325 ovs_numa_get_unpinned_core_any(void)
327 struct cpu_core
*core
;
329 HMAP_FOR_EACH(core
, hmap_node
, &all_cpu_cores
) {
330 if (core
->available
&& !core
->pinned
) {
332 return core
->core_id
;
336 return OVS_CORE_UNSPEC
;
339 /* Searches through all cores on numa node with 'numa_id' for an
340 * unpinned and available core. Returns the core_id if found and
341 * sets the 'core->pinned' to true. Otherwise, returns OVS_CORE_UNSPEC. */
343 ovs_numa_get_unpinned_core_on_numa(int numa_id
)
345 struct numa_node
*numa
= get_numa_by_numa_id(numa_id
);
348 struct cpu_core
*core
;
350 LIST_FOR_EACH(core
, list_node
, &numa
->cores
) {
351 if (core
->available
&& !core
->pinned
) {
353 return core
->core_id
;
358 return OVS_CORE_UNSPEC
;
361 /* Unpins the core with 'core_id'. */
363 ovs_numa_unpin_core(unsigned core_id
)
365 struct cpu_core
*core
= get_core_by_core_id(core_id
);
368 core
->pinned
= false;
372 /* Given the 'numa_id', returns dump of all cores on the numa node. */
373 struct ovs_numa_dump
*
374 ovs_numa_dump_cores_on_numa(int numa_id
)
376 struct ovs_numa_dump
*dump
= NULL
;
377 struct numa_node
*numa
= get_numa_by_numa_id(numa_id
);
380 struct cpu_core
*core
;
382 dump
= xmalloc(sizeof *dump
);
383 list_init(&dump
->dump
);
384 LIST_FOR_EACH(core
, list_node
, &numa
->cores
) {
385 struct ovs_numa_info
*info
= xmalloc(sizeof *info
);
387 info
->numa_id
= numa
->numa_id
;
388 info
->core_id
= core
->core_id
;
389 list_insert(&dump
->dump
, &info
->list_node
);
397 ovs_numa_dump_destroy(struct ovs_numa_dump
*dump
)
399 struct ovs_numa_info
*iter
;
401 LIST_FOR_EACH_POP (iter
, list_node
, &dump
->dump
) {
408 /* Reads the cpu mask configuration from 'cmask' and sets the
409 * 'available' of corresponding cores. For unspecified cores,
410 * sets 'available' to false. */
412 ovs_numa_set_cpu_mask(const char *cmask
)
417 if (!found_numa_and_core
) {
421 /* If no mask specified, resets the 'available' to true for all cores. */
423 struct cpu_core
*core
;
425 HMAP_FOR_EACH(core
, hmap_node
, &all_cpu_cores
) {
426 core
->available
= true;
432 for (i
= strlen(cmask
) - 1; i
>= 0; i
--) {
433 char hex
= toupper(cmask
[i
]);
436 if (hex
>= '0' && hex
<= '9') {
438 } else if (hex
>= 'A' && hex
<= 'F') {
439 bin
= hex
- 'A' + 10;
442 VLOG_WARN("Invalid cpu mask: %c", cmask
[i
]);
445 for (j
= 0; j
< 4; j
++) {
446 struct cpu_core
*core
;
448 core
= CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores
,
449 hash_int(core_id
++, 0)),
450 struct cpu_core
, hmap_node
);
451 core
->available
= (bin
>> j
) & 0x1;
453 if (core_id
>= hmap_count(&all_cpu_cores
)) {
459 /* For unspecified cores, sets 'available' to false. */
460 while (core_id
< hmap_count(&all_cpu_cores
)) {
461 struct cpu_core
*core
;
463 core
= CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores
,
464 hash_int(core_id
++, 0)),
465 struct cpu_core
, hmap_node
);
466 core
->available
= false;
470 #endif /* __linux__ */