]> git.proxmox.com Git - mirror_ovs.git/blame - lib/ovs-numa.c
stopwatch: Remove tabs from output.
[mirror_ovs.git] / lib / ovs-numa.c
CommitLineData
7c5a3bbf
AW
1/*
2 * Copyright (c) 2014 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
7c5a3bbf
AW
17#include <config.h>
18#include "ovs-numa.h"
19
20#include <ctype.h>
7c5a3bbf 21#include <errno.h>
93ce5762
DDP
22#ifdef __linux__
23#include <dirent.h>
7c5a3bbf
AW
24#include <stddef.h>
25#include <string.h>
26#include <sys/types.h>
27#include <unistd.h>
93ce5762 28#endif /* __linux__ */
7c5a3bbf
AW
29
30#include "hash.h"
ee89ea7b 31#include "openvswitch/hmap.h"
b19bab5b 32#include "openvswitch/list.h"
7c5a3bbf 33#include "ovs-thread.h"
e6211adc 34#include "openvswitch/vlog.h"
dbedeb9d 35#include "util.h"
7c5a3bbf
AW
36
37VLOG_DEFINE_THIS_MODULE(ovs_numa);
38
34185750
AW
39/* ovs-numa module
40 * ===============
41 *
42 * This module stores the affinity information of numa nodes and cpu cores.
43 * It also provides functions to bookkeep the pin of threads on cpu cores.
44 *
45 * It is assumed that the numa node ids and cpu core ids all start from 0 and
46 * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N,
47 * user can assume core ids from 0 to N-1 are all valid and there is a
48 * 'struct cpu_core' for each id.
49 *
9da2564e
AW
50 * NOTE, this module should only be used by the main thread.
51 *
34185750
AW
52 * NOTE, the assumption above will fail when cpu hotplug is used. In that
53 * case ovs-numa will not function correctly. For now, add a TODO entry
54 * for addressing it in the future.
55 *
56 * TODO: Fix ovs-numa when cpu hotplug is used.
57 */
58
012c0a04 59#define MAX_NUMA_NODES 128
7c5a3bbf 60
012c0a04
AW
61/* numa node. */
62struct numa_node {
63 struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */
ca6ba700 64 struct ovs_list cores; /* List of cpu cores on the numa node. */
012c0a04 65 int numa_id; /* numa node id. */
7c5a3bbf
AW
66};
67
012c0a04 68/* Cpu core on a numa node. */
7c5a3bbf
AW
69struct cpu_core {
70 struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */
ca6ba700 71 struct ovs_list list_node; /* In 'numa_node->cores' list. */
012c0a04 72 struct numa_node *numa; /* numa node containing the core. */
bd5131ba 73 unsigned core_id; /* Core id. */
7c5a3bbf
AW
74};
75
012c0a04
AW
76/* Contains all 'struct numa_node's. */
77static struct hmap all_numa_nodes = HMAP_INITIALIZER(&all_numa_nodes);
7c5a3bbf
AW
78/* Contains all 'struct cpu_core's. */
79static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores);
012c0a04
AW
80/* True if numa node and core info are correctly extracted. */
81static bool found_numa_and_core;
b4e28b7f
DDP
82/* True if the module was initialized with dummy options. In this case, the
83 * module must not interact with the actual cpus/nodes in the system. */
84static bool dummy_numa = false;
85/* If 'dummy_numa' is true, contains a copy of the dummy numa configuration
86 * parameter */
87static char *dummy_config;
88
89static struct numa_node *get_numa_by_numa_id(int numa_id);
7c5a3bbf 90
93ce5762 91#ifdef __linux__
7c5a3bbf
AW
92/* Returns true if 'str' contains all digits. Returns false otherwise. */
93static bool
94contain_all_digits(const char *str)
95{
96 return str[strspn(str, "0123456789")] == '\0';
97}
93ce5762 98#endif /* __linux__ */
7c5a3bbf 99
b4e28b7f
DDP
100static struct numa_node *
101insert_new_numa_node(int numa_id)
102{
103 struct numa_node *n = xzalloc(sizeof *n);
104
105 hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(numa_id, 0));
106 ovs_list_init(&n->cores);
107 n->numa_id = numa_id;
108
109 return n;
110}
111
112static struct cpu_core *
113insert_new_cpu_core(struct numa_node *n, unsigned core_id)
114{
115 struct cpu_core *c = xzalloc(sizeof *c);
116
117 hmap_insert(&all_cpu_cores, &c->hmap_node, hash_int(core_id, 0));
118 ovs_list_insert(&n->cores, &c->list_node);
119 c->core_id = core_id;
120 c->numa = n;
b4e28b7f
DDP
121
122 return c;
123}
124
84b99a04
JP
125/* Has the same effect as discover_numa_and_core(), but instead of
126 * reading sysfs entries, extracts the info from the global variable
127 * 'dummy_config', which is set with ovs_numa_set_dummy().
b4e28b7f
DDP
128 *
129 * 'dummy_config' lists the numa_ids of each CPU separated by a comma, e.g.
130 * - "0,0,0,0": four cores on numa socket 0.
131 * - "0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1": 16 cores on two numa sockets.
132 * - "0,0,0,0,1,1,1,1": 8 cores on two numa sockets.
133 *
134 * The different numa ids must be consecutives or the function will abort. */
135static void
84b99a04 136discover_numa_and_core_dummy(void)
b4e28b7f
DDP
137{
138 char *conf = xstrdup(dummy_config);
139 char *id, *saveptr = NULL;
140 unsigned i = 0;
141 long max_numa_id = 0;
142
143 for (id = strtok_r(conf, ",", &saveptr); id;
144 id = strtok_r(NULL, ",", &saveptr)) {
145 struct hmap_node *hnode;
146 struct numa_node *n;
147 long numa_id;
148
149 numa_id = strtol(id, NULL, 10);
150 if (numa_id < 0 || numa_id >= MAX_NUMA_NODES) {
151 VLOG_WARN("Invalid numa node %ld", numa_id);
152 continue;
153 }
154
155 max_numa_id = MAX(max_numa_id, numa_id);
156
157 hnode = hmap_first_with_hash(&all_numa_nodes, hash_int(numa_id, 0));
158
159 if (hnode) {
160 n = CONTAINER_OF(hnode, struct numa_node, hmap_node);
161 } else {
162 n = insert_new_numa_node(numa_id);
163 }
164
165 insert_new_cpu_core(n, i);
166
167 i++;
168 }
169
170 free(conf);
171
172 if (max_numa_id + 1 != hmap_count(&all_numa_nodes)) {
173 ovs_fatal(0, "dummy numa contains non consecutive numa ids");
174 }
175}
176
012c0a04
AW
177/* Discovers all numa nodes and the corresponding cpu cores.
178 * Constructs the 'struct numa_node' and 'struct cpu_core'. */
7c5a3bbf 179static void
012c0a04 180discover_numa_and_core(void)
7c5a3bbf 181{
93ce5762 182#ifdef __linux__
7c5a3bbf 183 int i;
8ae587b9
IM
184 DIR *dir;
185 bool numa_supported = true;
186
187 /* Check if NUMA supported on this system. */
188 dir = opendir("/sys/devices/system/node");
189
190 if (!dir && errno == ENOENT) {
191 numa_supported = false;
192 }
193 if (dir) {
194 closedir(dir);
195 }
7c5a3bbf 196
012c0a04 197 for (i = 0; i < MAX_NUMA_NODES; i++) {
7c5a3bbf
AW
198 char* path;
199
8ae587b9
IM
200 if (numa_supported) {
201 /* Constructs the path to node /sys/devices/system/nodeX. */
202 path = xasprintf("/sys/devices/system/node/node%d", i);
203 } else {
204 path = xasprintf("/sys/devices/system/cpu/");
205 }
206
7c5a3bbf
AW
207 dir = opendir(path);
208
012c0a04 209 /* Creates 'struct numa_node' if the 'dir' is non-null. */
7c5a3bbf 210 if (dir) {
b4e28b7f 211 struct numa_node *n;
7c5a3bbf
AW
212 struct dirent *subdir;
213
b4e28b7f 214 n = insert_new_numa_node(i);
7c5a3bbf
AW
215
216 while ((subdir = readdir(dir)) != NULL) {
217 if (!strncmp(subdir->d_name, "cpu", 3)
b4e28b7f 218 && contain_all_digits(subdir->d_name + 3)) {
bd5131ba 219 unsigned core_id;
7c5a3bbf
AW
220
221 core_id = strtoul(subdir->d_name + 3, NULL, 10);
b4e28b7f 222 insert_new_cpu_core(n, core_id);
7c5a3bbf
AW
223 }
224 }
7c5a3bbf 225 closedir(dir);
8ae587b9
IM
226 } else if (errno != ENOENT) {
227 VLOG_WARN("opendir(%s) failed (%s)", path,
228 ovs_strerror(errno));
229 }
230
231 free(path);
232 if (!dir || !numa_supported) {
7c5a3bbf
AW
233 break;
234 }
235 }
93ce5762 236#endif /* __linux__ */
7c5a3bbf
AW
237}
238
9da2564e
AW
239/* Gets 'struct cpu_core' by 'core_id'. */
240static struct cpu_core*
bd5131ba 241get_core_by_core_id(unsigned core_id)
9da2564e 242{
0900ca8e 243 struct cpu_core *core;
9da2564e 244
0900ca8e
DDP
245 HMAP_FOR_EACH_WITH_HASH (core, hmap_node, hash_int(core_id, 0),
246 &all_cpu_cores) {
247 if (core->core_id == core_id) {
248 return core;
249 }
9da2564e
AW
250 }
251
0900ca8e 252 return NULL;
9da2564e
AW
253}
254
255/* Gets 'struct numa_node' by 'numa_id'. */
256static struct numa_node*
257get_numa_by_numa_id(int numa_id)
258{
0900ca8e 259 struct numa_node *numa;
9da2564e 260
0900ca8e
DDP
261 HMAP_FOR_EACH_WITH_HASH (numa, hmap_node, hash_int(numa_id, 0),
262 &all_numa_nodes) {
263 if (numa->numa_id == numa_id) {
264 return numa;
265 }
9da2564e
AW
266 }
267
0900ca8e 268 return NULL;
9da2564e
AW
269}
270
271\f
84b99a04
JP
272/* Initializes the numa module. */
273void
274ovs_numa_init(void)
7c5a3bbf
AW
275{
276 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
277
278 if (ovsthread_once_start(&once)) {
b4e28b7f
DDP
279 const struct numa_node *n;
280
84b99a04
JP
281 if (dummy_numa) {
282 discover_numa_and_core_dummy();
b4e28b7f 283 } else {
84b99a04 284 discover_numa_and_core();
b4e28b7f
DDP
285 }
286
287 HMAP_FOR_EACH(n, hmap_node, &all_numa_nodes) {
288 VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d",
289 ovs_list_size(&n->cores), n->numa_id);
290 }
291
292 VLOG_INFO("Discovered %"PRIuSIZE" NUMA nodes and %"PRIuSIZE" CPU cores",
293 hmap_count(&all_numa_nodes), hmap_count(&all_cpu_cores));
294
295 if (hmap_count(&all_numa_nodes) && hmap_count(&all_cpu_cores)) {
296 found_numa_and_core = true;
297 }
298
7c5a3bbf 299 ovsthread_once_done(&once);
b4e28b7f
DDP
300 }
301}
302
303/* Extracts the numa node and core info from the 'config'. This is useful for
304 * testing purposes. The function must be called once, before ovs_numa_init().
305 *
306 * The format of 'config' is explained in the comment above
307 * discover_numa_and_core_dummy().*/
308void
309ovs_numa_set_dummy(const char *config)
310{
311 dummy_numa = true;
312 ovs_assert(config);
313 free(dummy_config);
314 dummy_config = xstrdup(config);
315}
316
7c5a3bbf 317bool
012c0a04 318ovs_numa_numa_id_is_valid(int numa_id)
7c5a3bbf 319{
421aa227 320 return found_numa_and_core && numa_id < ovs_numa_get_n_numas();
7c5a3bbf
AW
321}
322
323bool
bd5131ba 324ovs_numa_core_id_is_valid(unsigned core_id)
7c5a3bbf 325{
421aa227 326 return found_numa_and_core && core_id < ovs_numa_get_n_cores();
7c5a3bbf
AW
327}
328
012c0a04 329/* Returns the number of numa nodes. */
7c5a3bbf 330int
012c0a04 331ovs_numa_get_n_numas(void)
7c5a3bbf 332{
012c0a04
AW
333 return found_numa_and_core ? hmap_count(&all_numa_nodes)
334 : OVS_NUMA_UNSPEC;
7c5a3bbf
AW
335}
336
337/* Returns the number of cpu cores. */
338int
339ovs_numa_get_n_cores(void)
340{
012c0a04
AW
341 return found_numa_and_core ? hmap_count(&all_cpu_cores)
342 : OVS_CORE_UNSPEC;
7c5a3bbf
AW
343}
344
6b1105fb
AW
345/* Given 'core_id', returns the corresponding numa node id. Returns
346 * OVS_NUMA_UNSPEC if 'core_id' is invalid. */
347int
bd5131ba 348ovs_numa_get_numa_id(unsigned core_id)
6b1105fb 349{
9da2564e 350 struct cpu_core *core = get_core_by_core_id(core_id);
6b1105fb 351
9da2564e 352 if (core) {
6b1105fb
AW
353 return core->numa->numa_id;
354 }
9da2564e 355
6b1105fb
AW
356 return OVS_NUMA_UNSPEC;
357}
358
421aa227
AW
359/* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC
360 * if 'numa_id' is invalid. */
7c5a3bbf 361int
012c0a04 362ovs_numa_get_n_cores_on_numa(int numa_id)
7c5a3bbf 363{
9da2564e 364 struct numa_node *numa = get_numa_by_numa_id(numa_id);
7c5a3bbf 365
9da2564e 366 if (numa) {
417e7e66 367 return ovs_list_size(&numa->cores);
7c5a3bbf
AW
368 }
369
370 return OVS_CORE_UNSPEC;
371}
372
90f9f839
DDP
373static struct ovs_numa_dump *
374ovs_numa_dump_create(void)
375{
376 struct ovs_numa_dump *dump = xmalloc(sizeof *dump);
377
378 hmap_init(&dump->cores);
379 hmap_init(&dump->numas);
380
381 return dump;
382}
383
384static void
385ovs_numa_dump_add(struct ovs_numa_dump *dump, int numa_id, int core_id)
386{
387 struct ovs_numa_info_core *c = xzalloc(sizeof *c);
388 struct ovs_numa_info_numa *n;
389
390 c->numa_id = numa_id;
391 c->core_id = core_id;
392 hmap_insert(&dump->cores, &c->hmap_node, hash_2words(numa_id, core_id));
393
394 HMAP_FOR_EACH_WITH_HASH (n, hmap_node, hash_int(numa_id, 0),
395 &dump->numas) {
396 if (n->numa_id == numa_id) {
397 n->n_cores++;
398 return;
399 }
400 }
401
402 n = xzalloc(sizeof *n);
403 n->numa_id = numa_id;
404 n->n_cores = 1;
405 hmap_insert(&dump->numas, &n->hmap_node, hash_int(numa_id, 0));
406}
407
9da2564e
AW
408/* Given the 'numa_id', returns dump of all cores on the numa node. */
409struct ovs_numa_dump *
410ovs_numa_dump_cores_on_numa(int numa_id)
411{
90f9f839 412 struct ovs_numa_dump *dump = ovs_numa_dump_create();
9da2564e
AW
413 struct numa_node *numa = get_numa_by_numa_id(numa_id);
414
415 if (numa) {
416 struct cpu_core *core;
417
90f9f839
DDP
418 LIST_FOR_EACH (core, list_node, &numa->cores) {
419 ovs_numa_dump_add(dump, numa->numa_id, core->core_id);
9da2564e
AW
420 }
421 }
422
423 return dump;
424}
425
dbedeb9d
DDP
426struct ovs_numa_dump *
427ovs_numa_dump_cores_with_cmask(const char *cmask)
428{
90f9f839 429 struct ovs_numa_dump *dump = ovs_numa_dump_create();
dbedeb9d
DDP
430 int core_id = 0;
431 int end_idx;
432
dbedeb9d
DDP
433 /* Ignore leading 0x. */
434 end_idx = 0;
435 if (!strncmp(cmask, "0x", 2) || !strncmp(cmask, "0X", 2)) {
436 end_idx = 2;
437 }
438
439 for (int i = strlen(cmask) - 1; i >= end_idx; i--) {
440 char hex = cmask[i];
441 int bin;
442
443 bin = hexit_value(hex);
444 if (bin == -1) {
445 VLOG_WARN("Invalid cpu mask: %c", cmask[i]);
446 bin = 0;
447 }
448
449 for (int j = 0; j < 4; j++) {
450 if ((bin >> j) & 0x1) {
451 struct cpu_core *core = get_core_by_core_id(core_id);
452
453 if (core) {
90f9f839
DDP
454 ovs_numa_dump_add(dump,
455 core->numa->numa_id,
456 core->core_id);
dbedeb9d
DDP
457 }
458 }
459
460 core_id++;
461 }
462 }
463
464 return dump;
465}
466
467struct ovs_numa_dump *
468ovs_numa_dump_n_cores_per_numa(int cores_per_numa)
469{
90f9f839 470 struct ovs_numa_dump *dump = ovs_numa_dump_create();
dbedeb9d
DDP
471 const struct numa_node *n;
472
dbedeb9d
DDP
473 HMAP_FOR_EACH (n, hmap_node, &all_numa_nodes) {
474 const struct cpu_core *core;
475 int i = 0;
476
477 LIST_FOR_EACH (core, list_node, &n->cores) {
478 if (i++ >= cores_per_numa) {
479 break;
480 }
481
90f9f839 482 ovs_numa_dump_add(dump, core->numa->numa_id, core->core_id);
dbedeb9d
DDP
483 }
484 }
485
486 return dump;
487}
488
b2ce05ed
DDP
489bool
490ovs_numa_dump_contains_core(const struct ovs_numa_dump *dump,
491 int numa_id, unsigned core_id)
492{
90f9f839 493 struct ovs_numa_info_core *core;
b2ce05ed
DDP
494
495 HMAP_FOR_EACH_WITH_HASH (core, hmap_node, hash_2words(numa_id, core_id),
90f9f839 496 &dump->cores) {
b2ce05ed
DDP
497 if (core->core_id == core_id && core->numa_id == numa_id) {
498 return true;
499 }
500 }
501
502 return false;
503}
504
90f9f839
DDP
505size_t
506ovs_numa_dump_count(const struct ovs_numa_dump *dump)
507{
508 return hmap_count(&dump->cores);
509}
510
9da2564e
AW
511void
512ovs_numa_dump_destroy(struct ovs_numa_dump *dump)
513{
90f9f839
DDP
514 struct ovs_numa_info_core *c;
515 struct ovs_numa_info_numa *n;
9da2564e 516
93ce5762
DDP
517 if (!dump) {
518 return;
519 }
520
90f9f839
DDP
521 HMAP_FOR_EACH_POP (c, hmap_node, &dump->cores) {
522 free(c);
523 }
524
525 HMAP_FOR_EACH_POP (n, hmap_node, &dump->numas) {
526 free(n);
9da2564e
AW
527 }
528
90f9f839
DDP
529 hmap_destroy(&dump->cores);
530 hmap_destroy(&dump->numas);
b2ce05ed 531
9da2564e
AW
532 free(dump);
533}
534
b4e28b7f 535int ovs_numa_thread_setaffinity_core(unsigned core_id OVS_UNUSED)
6930c7e0 536{
b4e28b7f
DDP
537 if (dummy_numa) {
538 /* Nothing to do */
539 return 0;
540 }
541
6930c7e0
DDP
542#ifdef __linux__
543 cpu_set_t cpuset;
544 int err;
545
546 CPU_ZERO(&cpuset);
547 CPU_SET(core_id, &cpuset);
548 err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
549 if (err) {
550 VLOG_ERR("Thread affinity error %d",err);
551 return err;
552 }
553
554 return 0;
555#else /* !__linux__ */
556 return EOPNOTSUPP;
557#endif /* __linux__ */
558}