2 * Copyright (c) 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
26 #include <sys/types.h>
28 #endif /* __linux__ */
31 #include "openvswitch/hmap.h"
32 #include "openvswitch/list.h"
33 #include "ovs-thread.h"
34 #include "openvswitch/vlog.h"
37 VLOG_DEFINE_THIS_MODULE(ovs_numa
);
42 * This module stores the affinity information of numa nodes and cpu cores.
43 * It also provides functions to bookkeep the pin of threads on cpu cores.
45 * It is assumed that the numa node ids and cpu core ids all start from 0 and
46 * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N,
47 * user can assume core ids from 0 to N-1 are all valid and there is a
48 * 'struct cpu_core' for each id.
50 * NOTE, this module should only be used by the main thread.
52 * NOTE, the assumption above will fail when cpu hotplug is used. In that
53 * case ovs-numa will not function correctly. For now, add a TODO entry
54 * for addressing it in the future.
56 * TODO: Fix ovs-numa when cpu hotplug is used.
59 #define MAX_NUMA_NODES 128
63 struct hmap_node hmap_node
; /* In the 'all_numa_nodes'. */
64 struct ovs_list cores
; /* List of cpu cores on the numa node. */
65 int numa_id
; /* numa node id. */
68 /* Cpu core on a numa node. */
70 struct hmap_node hmap_node
;/* In the 'all_cpu_cores'. */
71 struct ovs_list list_node
; /* In 'numa_node->cores' list. */
72 struct numa_node
*numa
; /* numa node containing the core. */
73 unsigned core_id
; /* Core id. */
76 /* Contains all 'struct numa_node's. */
77 static struct hmap all_numa_nodes
= HMAP_INITIALIZER(&all_numa_nodes
);
78 /* Contains all 'struct cpu_core's. */
79 static struct hmap all_cpu_cores
= HMAP_INITIALIZER(&all_cpu_cores
);
80 /* True if numa node and core info are correctly extracted. */
81 static bool found_numa_and_core
;
82 /* True if the module was initialized with dummy options. In this case, the
83 * module must not interact with the actual cpus/nodes in the system. */
84 static bool dummy_numa
= false;
85 /* If 'dummy_numa' is true, contains a copy of the dummy numa configuration
87 static char *dummy_config
;
89 static struct numa_node
*get_numa_by_numa_id(int numa_id
);
92 /* Returns true if 'str' contains all digits. Returns false otherwise. */
94 contain_all_digits(const char *str
)
96 return str
[strspn(str
, "0123456789")] == '\0';
98 #endif /* __linux__ */
100 static struct numa_node
*
101 insert_new_numa_node(int numa_id
)
103 struct numa_node
*n
= xzalloc(sizeof *n
);
105 hmap_insert(&all_numa_nodes
, &n
->hmap_node
, hash_int(numa_id
, 0));
106 ovs_list_init(&n
->cores
);
107 n
->numa_id
= numa_id
;
112 static struct cpu_core
*
113 insert_new_cpu_core(struct numa_node
*n
, unsigned core_id
)
115 struct cpu_core
*c
= xzalloc(sizeof *c
);
117 hmap_insert(&all_cpu_cores
, &c
->hmap_node
, hash_int(core_id
, 0));
118 ovs_list_insert(&n
->cores
, &c
->list_node
);
119 c
->core_id
= core_id
;
125 /* Has the same effect as discover_numa_and_core(), but instead of
126 * reading sysfs entries, extracts the info from the global variable
127 * 'dummy_config', which is set with ovs_numa_set_dummy().
129 * 'dummy_config' lists the numa_ids of each CPU separated by a comma, e.g.
130 * - "0,0,0,0": four cores on numa socket 0.
131 * - "0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1": 16 cores on two numa sockets.
132 * - "0,0,0,0,1,1,1,1": 8 cores on two numa sockets.
134 * The different numa ids must be consecutives or the function will abort. */
136 discover_numa_and_core_dummy(void)
138 char *conf
= xstrdup(dummy_config
);
139 char *id
, *saveptr
= NULL
;
141 long max_numa_id
= 0;
143 for (id
= strtok_r(conf
, ",", &saveptr
); id
;
144 id
= strtok_r(NULL
, ",", &saveptr
)) {
145 struct hmap_node
*hnode
;
149 numa_id
= strtol(id
, NULL
, 10);
150 if (numa_id
< 0 || numa_id
>= MAX_NUMA_NODES
) {
151 VLOG_WARN("Invalid numa node %ld", numa_id
);
155 max_numa_id
= MAX(max_numa_id
, numa_id
);
157 hnode
= hmap_first_with_hash(&all_numa_nodes
, hash_int(numa_id
, 0));
160 n
= CONTAINER_OF(hnode
, struct numa_node
, hmap_node
);
162 n
= insert_new_numa_node(numa_id
);
165 insert_new_cpu_core(n
, i
);
172 if (max_numa_id
+ 1 != hmap_count(&all_numa_nodes
)) {
173 ovs_fatal(0, "dummy numa contains non consecutive numa ids");
177 /* Discovers all numa nodes and the corresponding cpu cores.
178 * Constructs the 'struct numa_node' and 'struct cpu_core'. */
180 discover_numa_and_core(void)
185 bool numa_supported
= true;
187 /* Check if NUMA supported on this system. */
188 dir
= opendir("/sys/devices/system/node");
190 if (!dir
&& errno
== ENOENT
) {
191 numa_supported
= false;
197 for (i
= 0; i
< MAX_NUMA_NODES
; i
++) {
200 if (numa_supported
) {
201 /* Constructs the path to node /sys/devices/system/nodeX. */
202 path
= xasprintf("/sys/devices/system/node/node%d", i
);
204 path
= xasprintf("/sys/devices/system/cpu/");
209 /* Creates 'struct numa_node' if the 'dir' is non-null. */
212 struct dirent
*subdir
;
214 n
= insert_new_numa_node(i
);
216 while ((subdir
= readdir(dir
)) != NULL
) {
217 if (!strncmp(subdir
->d_name
, "cpu", 3)
218 && contain_all_digits(subdir
->d_name
+ 3)) {
221 core_id
= strtoul(subdir
->d_name
+ 3, NULL
, 10);
222 insert_new_cpu_core(n
, core_id
);
226 } else if (errno
!= ENOENT
) {
227 VLOG_WARN("opendir(%s) failed (%s)", path
,
228 ovs_strerror(errno
));
232 if (!dir
|| !numa_supported
) {
236 #endif /* __linux__ */
239 /* Gets 'struct cpu_core' by 'core_id'. */
240 static struct cpu_core
*
241 get_core_by_core_id(unsigned core_id
)
243 struct cpu_core
*core
;
245 HMAP_FOR_EACH_WITH_HASH (core
, hmap_node
, hash_int(core_id
, 0),
247 if (core
->core_id
== core_id
) {
255 /* Gets 'struct numa_node' by 'numa_id'. */
256 static struct numa_node
*
257 get_numa_by_numa_id(int numa_id
)
259 struct numa_node
*numa
;
261 HMAP_FOR_EACH_WITH_HASH (numa
, hmap_node
, hash_int(numa_id
, 0),
263 if (numa
->numa_id
== numa_id
) {
272 /* Initializes the numa module. */
276 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
278 if (ovsthread_once_start(&once
)) {
279 const struct numa_node
*n
;
282 discover_numa_and_core_dummy();
284 discover_numa_and_core();
287 HMAP_FOR_EACH(n
, hmap_node
, &all_numa_nodes
) {
288 VLOG_INFO("Discovered %"PRIuSIZE
" CPU cores on NUMA node %d",
289 ovs_list_size(&n
->cores
), n
->numa_id
);
292 VLOG_INFO("Discovered %"PRIuSIZE
" NUMA nodes and %"PRIuSIZE
" CPU cores",
293 hmap_count(&all_numa_nodes
), hmap_count(&all_cpu_cores
));
295 if (hmap_count(&all_numa_nodes
) && hmap_count(&all_cpu_cores
)) {
296 found_numa_and_core
= true;
299 ovsthread_once_done(&once
);
303 /* Extracts the numa node and core info from the 'config'. This is useful for
304 * testing purposes. The function must be called once, before ovs_numa_init().
306 * The format of 'config' is explained in the comment above
307 * discover_numa_and_core_dummy().*/
309 ovs_numa_set_dummy(const char *config
)
314 dummy_config
= xstrdup(config
);
318 ovs_numa_numa_id_is_valid(int numa_id
)
320 return found_numa_and_core
&& numa_id
< ovs_numa_get_n_numas();
324 ovs_numa_core_id_is_valid(unsigned core_id
)
326 return found_numa_and_core
&& core_id
< ovs_numa_get_n_cores();
329 /* Returns the number of numa nodes. */
331 ovs_numa_get_n_numas(void)
333 return found_numa_and_core
? hmap_count(&all_numa_nodes
)
337 /* Returns the number of cpu cores. */
339 ovs_numa_get_n_cores(void)
341 return found_numa_and_core
? hmap_count(&all_cpu_cores
)
345 /* Given 'core_id', returns the corresponding numa node id. Returns
346 * OVS_NUMA_UNSPEC if 'core_id' is invalid. */
348 ovs_numa_get_numa_id(unsigned core_id
)
350 struct cpu_core
*core
= get_core_by_core_id(core_id
);
353 return core
->numa
->numa_id
;
356 return OVS_NUMA_UNSPEC
;
359 /* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC
360 * if 'numa_id' is invalid. */
362 ovs_numa_get_n_cores_on_numa(int numa_id
)
364 struct numa_node
*numa
= get_numa_by_numa_id(numa_id
);
367 return ovs_list_size(&numa
->cores
);
370 return OVS_CORE_UNSPEC
;
373 static struct ovs_numa_dump
*
374 ovs_numa_dump_create(void)
376 struct ovs_numa_dump
*dump
= xmalloc(sizeof *dump
);
378 hmap_init(&dump
->cores
);
379 hmap_init(&dump
->numas
);
385 ovs_numa_dump_add(struct ovs_numa_dump
*dump
, int numa_id
, int core_id
)
387 struct ovs_numa_info_core
*c
= xzalloc(sizeof *c
);
388 struct ovs_numa_info_numa
*n
;
390 c
->numa_id
= numa_id
;
391 c
->core_id
= core_id
;
392 hmap_insert(&dump
->cores
, &c
->hmap_node
, hash_2words(numa_id
, core_id
));
394 HMAP_FOR_EACH_WITH_HASH (n
, hmap_node
, hash_int(numa_id
, 0),
396 if (n
->numa_id
== numa_id
) {
402 n
= xzalloc(sizeof *n
);
403 n
->numa_id
= numa_id
;
405 hmap_insert(&dump
->numas
, &n
->hmap_node
, hash_int(numa_id
, 0));
408 /* Given the 'numa_id', returns dump of all cores on the numa node. */
409 struct ovs_numa_dump
*
410 ovs_numa_dump_cores_on_numa(int numa_id
)
412 struct ovs_numa_dump
*dump
= ovs_numa_dump_create();
413 struct numa_node
*numa
= get_numa_by_numa_id(numa_id
);
416 struct cpu_core
*core
;
418 LIST_FOR_EACH (core
, list_node
, &numa
->cores
) {
419 ovs_numa_dump_add(dump
, numa
->numa_id
, core
->core_id
);
426 struct ovs_numa_dump
*
427 ovs_numa_dump_cores_with_cmask(const char *cmask
)
429 struct ovs_numa_dump
*dump
= ovs_numa_dump_create();
433 /* Ignore leading 0x. */
435 if (!strncmp(cmask
, "0x", 2) || !strncmp(cmask
, "0X", 2)) {
439 for (int i
= strlen(cmask
) - 1; i
>= end_idx
; i
--) {
443 bin
= hexit_value(hex
);
445 VLOG_WARN("Invalid cpu mask: %c", cmask
[i
]);
449 for (int j
= 0; j
< 4; j
++) {
450 if ((bin
>> j
) & 0x1) {
451 struct cpu_core
*core
= get_core_by_core_id(core_id
);
454 ovs_numa_dump_add(dump
,
467 struct ovs_numa_dump
*
468 ovs_numa_dump_n_cores_per_numa(int cores_per_numa
)
470 struct ovs_numa_dump
*dump
= ovs_numa_dump_create();
471 const struct numa_node
*n
;
473 HMAP_FOR_EACH (n
, hmap_node
, &all_numa_nodes
) {
474 const struct cpu_core
*core
;
477 LIST_FOR_EACH (core
, list_node
, &n
->cores
) {
478 if (i
++ >= cores_per_numa
) {
482 ovs_numa_dump_add(dump
, core
->numa
->numa_id
, core
->core_id
);
490 ovs_numa_dump_contains_core(const struct ovs_numa_dump
*dump
,
491 int numa_id
, unsigned core_id
)
493 struct ovs_numa_info_core
*core
;
495 HMAP_FOR_EACH_WITH_HASH (core
, hmap_node
, hash_2words(numa_id
, core_id
),
497 if (core
->core_id
== core_id
&& core
->numa_id
== numa_id
) {
506 ovs_numa_dump_count(const struct ovs_numa_dump
*dump
)
508 return hmap_count(&dump
->cores
);
512 ovs_numa_dump_destroy(struct ovs_numa_dump
*dump
)
514 struct ovs_numa_info_core
*c
;
515 struct ovs_numa_info_numa
*n
;
521 HMAP_FOR_EACH_POP (c
, hmap_node
, &dump
->cores
) {
525 HMAP_FOR_EACH_POP (n
, hmap_node
, &dump
->numas
) {
529 hmap_destroy(&dump
->cores
);
530 hmap_destroy(&dump
->numas
);
535 struct ovs_numa_dump
*
536 ovs_numa_thread_getaffinity_dump(void)
546 struct ovs_numa_dump
*dump
;
547 const struct numa_node
*n
;
552 err
= pthread_getaffinity_np(pthread_self(), sizeof cpuset
, &cpuset
);
554 VLOG_ERR("Thread getaffinity error: %s", ovs_strerror(err
));
558 dump
= ovs_numa_dump_create();
560 HMAP_FOR_EACH (n
, hmap_node
, &all_numa_nodes
) {
561 const struct cpu_core
*core
;
563 LIST_FOR_EACH (core
, list_node
, &n
->cores
) {
564 if (CPU_ISSET(core
->core_id
, &cpuset
)) {
565 ovs_numa_dump_add(dump
, core
->numa
->numa_id
, core
->core_id
);
570 if (!ovs_numa_dump_count(dump
)) {
571 ovs_numa_dump_destroy(dump
);
575 #endif /* __linux__ */
579 ovs_numa_thread_setaffinity_dump(const struct ovs_numa_dump
*dump
)
581 if (!dump
|| dummy_numa
) {
587 const struct ovs_numa_info_core
*core
;
592 FOR_EACH_CORE_ON_DUMP (core
, dump
) {
593 CPU_SET(core
->core_id
, &cpuset
);
595 err
= pthread_setaffinity_np(pthread_self(), sizeof cpuset
, &cpuset
);
597 VLOG_ERR("Thread setaffinity error: %s", ovs_strerror(err
));
602 #else /* !__linux__ */
604 #endif /* __linux__ */
607 int ovs_numa_thread_setaffinity_core(unsigned core_id
)
609 const struct cpu_core
*core
= get_core_by_core_id(core_id
);
610 struct ovs_numa_dump
*affinity
= ovs_numa_dump_create();
614 ovs_numa_dump_add(affinity
, core
->numa
->numa_id
, core
->core_id
);
615 ret
= ovs_numa_thread_setaffinity_dump(affinity
);
618 ovs_numa_dump_destroy(affinity
);