]> git.proxmox.com Git - mirror_ovs.git/blame - lib/ovs-numa.c
list: Remove lib/list.h completely.
[mirror_ovs.git] / lib / ovs-numa.c
CommitLineData
7c5a3bbf
AW
1/*
2 * Copyright (c) 2014 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* On non-Linux, these functions are defined inline in ovs-numa.h. */
18#ifdef __linux__
19
20#include <config.h>
21#include "ovs-numa.h"
22
23#include <ctype.h>
24#include <dirent.h>
25#include <errno.h>
26#include <stddef.h>
27#include <string.h>
28#include <sys/types.h>
29#include <unistd.h>
30
31#include "hash.h"
32#include "hmap.h"
b19bab5b 33#include "openvswitch/list.h"
7c5a3bbf 34#include "ovs-thread.h"
e6211adc 35#include "openvswitch/vlog.h"
7c5a3bbf
AW
36
37VLOG_DEFINE_THIS_MODULE(ovs_numa);
38
34185750
AW
39/* ovs-numa module
40 * ===============
41 *
42 * This module stores the affinity information of numa nodes and cpu cores.
43 * It also provides functions to bookkeep the pin of threads on cpu cores.
44 *
45 * It is assumed that the numa node ids and cpu core ids all start from 0 and
46 * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N,
47 * user can assume core ids from 0 to N-1 are all valid and there is a
48 * 'struct cpu_core' for each id.
49 *
9da2564e
AW
50 * NOTE, this module should only be used by the main thread.
51 *
34185750
AW
52 * NOTE, the assumption above will fail when cpu hotplug is used. In that
53 * case ovs-numa will not function correctly. For now, add a TODO entry
54 * for addressing it in the future.
55 *
56 * TODO: Fix ovs-numa when cpu hotplug is used.
57 */
58
012c0a04 59#define MAX_NUMA_NODES 128
7c5a3bbf 60
012c0a04
AW
61/* numa node. */
62struct numa_node {
63 struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */
ca6ba700 64 struct ovs_list cores; /* List of cpu cores on the numa node. */
012c0a04 65 int numa_id; /* numa node id. */
7c5a3bbf
AW
66};
67
012c0a04 68/* Cpu core on a numa node. */
7c5a3bbf
AW
69struct cpu_core {
70 struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */
ca6ba700 71 struct ovs_list list_node; /* In 'numa_node->cores' list. */
012c0a04 72 struct numa_node *numa; /* numa node containing the core. */
bd5131ba 73 unsigned core_id; /* Core id. */
8db2f898 74 bool available; /* If the core can be pinned. */
7c5a3bbf
AW
75 bool pinned; /* If a thread has been pinned to the core. */
76};
77
012c0a04
AW
78/* Contains all 'struct numa_node's. */
79static struct hmap all_numa_nodes = HMAP_INITIALIZER(&all_numa_nodes);
7c5a3bbf
AW
80/* Contains all 'struct cpu_core's. */
81static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores);
012c0a04
AW
82/* True if numa node and core info are correctly extracted. */
83static bool found_numa_and_core;
7c5a3bbf
AW
84
85/* Returns true if 'str' contains all digits. Returns false otherwise. */
86static bool
87contain_all_digits(const char *str)
88{
89 return str[strspn(str, "0123456789")] == '\0';
90}
91
012c0a04
AW
92/* Discovers all numa nodes and the corresponding cpu cores.
93 * Constructs the 'struct numa_node' and 'struct cpu_core'. */
7c5a3bbf 94static void
012c0a04 95discover_numa_and_core(void)
7c5a3bbf
AW
96{
97 int n_cpus = 0;
98 int i;
8ae587b9
IM
99 DIR *dir;
100 bool numa_supported = true;
101
102 /* Check if NUMA supported on this system. */
103 dir = opendir("/sys/devices/system/node");
104
105 if (!dir && errno == ENOENT) {
106 numa_supported = false;
107 }
108 if (dir) {
109 closedir(dir);
110 }
7c5a3bbf 111
012c0a04 112 for (i = 0; i < MAX_NUMA_NODES; i++) {
7c5a3bbf
AW
113 char* path;
114
8ae587b9
IM
115 if (numa_supported) {
116 /* Constructs the path to node /sys/devices/system/nodeX. */
117 path = xasprintf("/sys/devices/system/node/node%d", i);
118 } else {
119 path = xasprintf("/sys/devices/system/cpu/");
120 }
121
7c5a3bbf
AW
122 dir = opendir(path);
123
012c0a04 124 /* Creates 'struct numa_node' if the 'dir' is non-null. */
7c5a3bbf 125 if (dir) {
012c0a04 126 struct numa_node *n = xzalloc(sizeof *n);
7c5a3bbf
AW
127 struct dirent *subdir;
128
012c0a04
AW
129 hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(i, 0));
130 list_init(&n->cores);
131 n->numa_id = i;
7c5a3bbf
AW
132
133 while ((subdir = readdir(dir)) != NULL) {
134 if (!strncmp(subdir->d_name, "cpu", 3)
135 && contain_all_digits(subdir->d_name + 3)){
136 struct cpu_core *c = xzalloc(sizeof *c);
bd5131ba 137 unsigned core_id;
7c5a3bbf
AW
138
139 core_id = strtoul(subdir->d_name + 3, NULL, 10);
140 hmap_insert(&all_cpu_cores, &c->hmap_node,
141 hash_int(core_id, 0));
012c0a04 142 list_insert(&n->cores, &c->list_node);
7c5a3bbf 143 c->core_id = core_id;
46a14035 144 c->numa = n;
8db2f898 145 c->available = true;
7c5a3bbf
AW
146 n_cpus++;
147 }
148 }
012c0a04
AW
149 VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d",
150 list_size(&n->cores), n->numa_id);
7c5a3bbf 151 closedir(dir);
8ae587b9
IM
152 } else if (errno != ENOENT) {
153 VLOG_WARN("opendir(%s) failed (%s)", path,
154 ovs_strerror(errno));
155 }
156
157 free(path);
158 if (!dir || !numa_supported) {
7c5a3bbf
AW
159 break;
160 }
161 }
162
012c0a04
AW
163 VLOG_INFO("Discovered %"PRIuSIZE" NUMA nodes and %d CPU cores",
164 hmap_count(&all_numa_nodes), n_cpus);
165 if (hmap_count(&all_numa_nodes) && hmap_count(&all_cpu_cores)) {
166 found_numa_and_core = true;
7c5a3bbf
AW
167 }
168}
169
9da2564e
AW
170/* Gets 'struct cpu_core' by 'core_id'. */
171static struct cpu_core*
bd5131ba 172get_core_by_core_id(unsigned core_id)
9da2564e
AW
173{
174 struct cpu_core *core = NULL;
175
176 if (ovs_numa_core_id_is_valid(core_id)) {
177 core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
178 hash_int(core_id, 0)),
179 struct cpu_core, hmap_node);
180 }
181
182 return core;
183}
184
185/* Gets 'struct numa_node' by 'numa_id'. */
186static struct numa_node*
187get_numa_by_numa_id(int numa_id)
188{
189 struct numa_node *numa = NULL;
190
191 if (ovs_numa_numa_id_is_valid(numa_id)) {
192 numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes,
193 hash_int(numa_id, 0)),
194 struct numa_node, hmap_node);
195 }
196
197 return numa;
198}
199
200\f
7c5a3bbf
AW
201/* Extracts the numa node and core info from the 'sysfs'. */
202void
203ovs_numa_init(void)
204{
205 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
206
207 if (ovsthread_once_start(&once)) {
012c0a04 208 discover_numa_and_core();
7c5a3bbf
AW
209 ovsthread_once_done(&once);
210 }
211}
212
213bool
012c0a04 214ovs_numa_numa_id_is_valid(int numa_id)
7c5a3bbf 215{
421aa227 216 return found_numa_and_core && numa_id < ovs_numa_get_n_numas();
7c5a3bbf
AW
217}
218
219bool
bd5131ba 220ovs_numa_core_id_is_valid(unsigned core_id)
7c5a3bbf 221{
421aa227 222 return found_numa_and_core && core_id < ovs_numa_get_n_cores();
7c5a3bbf
AW
223}
224
9da2564e 225bool
bd5131ba 226ovs_numa_core_is_pinned(unsigned core_id)
9da2564e
AW
227{
228 struct cpu_core *core = get_core_by_core_id(core_id);
229
230 if (core) {
231 return core->pinned;
232 }
233
234 return false;
235}
236
012c0a04 237/* Returns the number of numa nodes. */
7c5a3bbf 238int
012c0a04 239ovs_numa_get_n_numas(void)
7c5a3bbf 240{
012c0a04
AW
241 return found_numa_and_core ? hmap_count(&all_numa_nodes)
242 : OVS_NUMA_UNSPEC;
7c5a3bbf
AW
243}
244
245/* Returns the number of cpu cores. */
246int
247ovs_numa_get_n_cores(void)
248{
012c0a04
AW
249 return found_numa_and_core ? hmap_count(&all_cpu_cores)
250 : OVS_CORE_UNSPEC;
7c5a3bbf
AW
251}
252
6b1105fb
AW
253/* Given 'core_id', returns the corresponding numa node id. Returns
254 * OVS_NUMA_UNSPEC if 'core_id' is invalid. */
255int
bd5131ba 256ovs_numa_get_numa_id(unsigned core_id)
6b1105fb 257{
9da2564e 258 struct cpu_core *core = get_core_by_core_id(core_id);
6b1105fb 259
9da2564e 260 if (core) {
6b1105fb
AW
261 return core->numa->numa_id;
262 }
9da2564e 263
6b1105fb
AW
264 return OVS_NUMA_UNSPEC;
265}
266
421aa227
AW
267/* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC
268 * if 'numa_id' is invalid. */
7c5a3bbf 269int
012c0a04 270ovs_numa_get_n_cores_on_numa(int numa_id)
7c5a3bbf 271{
9da2564e 272 struct numa_node *numa = get_numa_by_numa_id(numa_id);
7c5a3bbf 273
9da2564e 274 if (numa) {
012c0a04 275 return list_size(&numa->cores);
7c5a3bbf
AW
276 }
277
278 return OVS_CORE_UNSPEC;
279}
280
8db2f898
AW
281/* Returns the number of cpu cores that are available and unpinned
282 * on numa node. Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */
7c5a3bbf 283int
012c0a04 284ovs_numa_get_n_unpinned_cores_on_numa(int numa_id)
7c5a3bbf 285{
9da2564e
AW
286 struct numa_node *numa = get_numa_by_numa_id(numa_id);
287
288 if (numa) {
7c5a3bbf
AW
289 struct cpu_core *core;
290 int count = 0;
291
012c0a04 292 LIST_FOR_EACH(core, list_node, &numa->cores) {
8db2f898 293 if (core->available && !core->pinned) {
7c5a3bbf
AW
294 count++;
295 }
296 }
7c5a3bbf
AW
297 return count;
298 }
299
300 return OVS_CORE_UNSPEC;
301}
302
303/* Given 'core_id', tries to pin that core. Returns true, if succeeds.
8db2f898
AW
304 * False, if the core has already been pinned, or if it is invalid or
305 * not available. */
7c5a3bbf 306bool
bd5131ba 307ovs_numa_try_pin_core_specific(unsigned core_id)
7c5a3bbf 308{
9da2564e 309 struct cpu_core *core = get_core_by_core_id(core_id);
7c5a3bbf 310
9da2564e 311 if (core) {
8db2f898 312 if (core->available && !core->pinned) {
421aa227
AW
313 core->pinned = true;
314 return true;
315 }
7c5a3bbf
AW
316 }
317
318 return false;
319}
320
8db2f898
AW
321/* Searches through all cores for an unpinned and available core. Returns
322 * the 'core_id' if found and sets the 'core->pinned' to true. Otherwise,
323 * returns OVS_CORE_UNSPEC. */
bd5131ba 324unsigned
7c5a3bbf
AW
325ovs_numa_get_unpinned_core_any(void)
326{
327 struct cpu_core *core;
328
329 HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) {
8db2f898 330 if (core->available && !core->pinned) {
7c5a3bbf
AW
331 core->pinned = true;
332 return core->core_id;
333 }
334 }
335
336 return OVS_CORE_UNSPEC;
337}
338
8db2f898
AW
339/* Searches through all cores on numa node with 'numa_id' for an
340 * unpinned and available core. Returns the core_id if found and
341 * sets the 'core->pinned' to true. Otherwise, returns OVS_CORE_UNSPEC. */
bd5131ba 342unsigned
012c0a04 343ovs_numa_get_unpinned_core_on_numa(int numa_id)
7c5a3bbf 344{
9da2564e
AW
345 struct numa_node *numa = get_numa_by_numa_id(numa_id);
346
347 if (numa) {
421aa227 348 struct cpu_core *core;
7c5a3bbf 349
421aa227 350 LIST_FOR_EACH(core, list_node, &numa->cores) {
8db2f898 351 if (core->available && !core->pinned) {
421aa227
AW
352 core->pinned = true;
353 return core->core_id;
354 }
7c5a3bbf
AW
355 }
356 }
357
358 return OVS_CORE_UNSPEC;
359}
360
8db2f898 361/* Unpins the core with 'core_id'. */
7c5a3bbf 362void
bd5131ba 363ovs_numa_unpin_core(unsigned core_id)
7c5a3bbf 364{
9da2564e 365 struct cpu_core *core = get_core_by_core_id(core_id);
7c5a3bbf 366
9da2564e 367 if (core) {
421aa227
AW
368 core->pinned = false;
369 }
7c5a3bbf
AW
370}
371
9da2564e
AW
372/* Given the 'numa_id', returns dump of all cores on the numa node. */
373struct ovs_numa_dump *
374ovs_numa_dump_cores_on_numa(int numa_id)
375{
376 struct ovs_numa_dump *dump = NULL;
377 struct numa_node *numa = get_numa_by_numa_id(numa_id);
378
379 if (numa) {
380 struct cpu_core *core;
381
382 dump = xmalloc(sizeof *dump);
383 list_init(&dump->dump);
384 LIST_FOR_EACH(core, list_node, &numa->cores) {
385 struct ovs_numa_info *info = xmalloc(sizeof *info);
386
387 info->numa_id = numa->numa_id;
388 info->core_id = core->core_id;
389 list_insert(&dump->dump, &info->list_node);
390 }
391 }
392
393 return dump;
394}
395
396void
397ovs_numa_dump_destroy(struct ovs_numa_dump *dump)
398{
5f03c983 399 struct ovs_numa_info *iter;
9da2564e 400
5f03c983 401 LIST_FOR_EACH_POP (iter, list_node, &dump->dump) {
9da2564e
AW
402 free(iter);
403 }
404
405 free(dump);
406}
407
8db2f898
AW
408/* Reads the cpu mask configuration from 'cmask' and sets the
409 * 'available' of corresponding cores. For unspecified cores,
410 * sets 'available' to false. */
411void
412ovs_numa_set_cpu_mask(const char *cmask)
413{
414 int core_id = 0;
415 int i;
416
417 if (!found_numa_and_core) {
418 return;
419 }
420
421 /* If no mask specified, resets the 'available' to true for all cores. */
422 if (!cmask) {
423 struct cpu_core *core;
424
425 HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) {
426 core->available = true;
427 }
428
429 return;
430 }
431
432 for (i = strlen(cmask) - 1; i >= 0; i--) {
433 char hex = toupper(cmask[i]);
434 int bin, j;
435
436 if (hex >= '0' && hex <= '9') {
437 bin = hex - '0';
438 } else if (hex >= 'A' && hex <= 'F') {
439 bin = hex - 'A' + 10;
440 } else {
441 bin = 0;
442 VLOG_WARN("Invalid cpu mask: %c", cmask[i]);
443 }
444
445 for (j = 0; j < 4; j++) {
446 struct cpu_core *core;
447
448 core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
449 hash_int(core_id++, 0)),
450 struct cpu_core, hmap_node);
451 core->available = (bin >> j) & 0x1;
452
453 if (core_id >= hmap_count(&all_cpu_cores)) {
454 return;
455 }
456 }
457 }
458
459 /* For unspecified cores, sets 'available' to false. */
460 while (core_id < hmap_count(&all_cpu_cores)) {
461 struct cpu_core *core;
462
463 core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores,
464 hash_int(core_id++, 0)),
465 struct cpu_core, hmap_node);
466 core->available = false;
467 }
468}
469
7c5a3bbf 470#endif /* __linux__ */