]>
Commit | Line | Data |
---|---|---|
7c5a3bbf AW |
1 | /* |
2 | * Copyright (c) 2014 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | /* On non-Linux, these functions are defined inline in ovs-numa.h. */ | |
18 | #ifdef __linux__ | |
19 | ||
20 | #include <config.h> | |
21 | #include "ovs-numa.h" | |
22 | ||
23 | #include <ctype.h> | |
24 | #include <dirent.h> | |
25 | #include <errno.h> | |
26 | #include <stddef.h> | |
27 | #include <string.h> | |
28 | #include <sys/types.h> | |
29 | #include <unistd.h> | |
30 | ||
31 | #include "hash.h" | |
32 | #include "hmap.h" | |
b19bab5b | 33 | #include "openvswitch/list.h" |
7c5a3bbf | 34 | #include "ovs-thread.h" |
e6211adc | 35 | #include "openvswitch/vlog.h" |
7c5a3bbf AW |
36 | |
37 | VLOG_DEFINE_THIS_MODULE(ovs_numa); | |
38 | ||
34185750 AW |
39 | /* ovs-numa module |
40 | * =============== | |
41 | * | |
42 | * This module stores the affinity information of numa nodes and cpu cores. | |
43 | * It also provides functions to bookkeep the pin of threads on cpu cores. | |
44 | * | |
45 | * It is assumed that the numa node ids and cpu core ids all start from 0 and | |
46 | * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N, | |
47 | * user can assume core ids from 0 to N-1 are all valid and there is a | |
48 | * 'struct cpu_core' for each id. | |
49 | * | |
9da2564e AW |
50 | * NOTE, this module should only be used by the main thread. |
51 | * | |
34185750 AW |
52 | * NOTE, the assumption above will fail when cpu hotplug is used. In that |
53 | * case ovs-numa will not function correctly. For now, add a TODO entry | |
54 | * for addressing it in the future. | |
55 | * | |
56 | * TODO: Fix ovs-numa when cpu hotplug is used. | |
57 | */ | |
58 | ||
012c0a04 | 59 | #define MAX_NUMA_NODES 128 |
7c5a3bbf | 60 | |
012c0a04 AW |
61 | /* numa node. */ |
62 | struct numa_node { | |
63 | struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */ | |
ca6ba700 | 64 | struct ovs_list cores; /* List of cpu cores on the numa node. */ |
012c0a04 | 65 | int numa_id; /* numa node id. */ |
7c5a3bbf AW |
66 | }; |
67 | ||
012c0a04 | 68 | /* Cpu core on a numa node. */ |
7c5a3bbf AW |
69 | struct cpu_core { |
70 | struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */ | |
ca6ba700 | 71 | struct ovs_list list_node; /* In 'numa_node->cores' list. */ |
012c0a04 | 72 | struct numa_node *numa; /* numa node containing the core. */ |
bd5131ba | 73 | unsigned core_id; /* Core id. */ |
8db2f898 | 74 | bool available; /* If the core can be pinned. */ |
7c5a3bbf AW |
75 | bool pinned; /* If a thread has been pinned to the core. */ |
76 | }; | |
77 | ||
012c0a04 AW |
78 | /* Contains all 'struct numa_node's. */ |
79 | static struct hmap all_numa_nodes = HMAP_INITIALIZER(&all_numa_nodes); | |
7c5a3bbf AW |
80 | /* Contains all 'struct cpu_core's. */ |
81 | static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores); | |
012c0a04 AW |
82 | /* True if numa node and core info are correctly extracted. */ |
83 | static bool found_numa_and_core; | |
7c5a3bbf AW |
84 | |
85 | /* Returns true if 'str' contains all digits. Returns false otherwise. */ | |
86 | static bool | |
87 | contain_all_digits(const char *str) | |
88 | { | |
89 | return str[strspn(str, "0123456789")] == '\0'; | |
90 | } | |
91 | ||
012c0a04 AW |
92 | /* Discovers all numa nodes and the corresponding cpu cores. |
93 | * Constructs the 'struct numa_node' and 'struct cpu_core'. */ | |
7c5a3bbf | 94 | static void |
012c0a04 | 95 | discover_numa_and_core(void) |
7c5a3bbf AW |
96 | { |
97 | int n_cpus = 0; | |
98 | int i; | |
8ae587b9 IM |
99 | DIR *dir; |
100 | bool numa_supported = true; | |
101 | ||
102 | /* Check if NUMA supported on this system. */ | |
103 | dir = opendir("/sys/devices/system/node"); | |
104 | ||
105 | if (!dir && errno == ENOENT) { | |
106 | numa_supported = false; | |
107 | } | |
108 | if (dir) { | |
109 | closedir(dir); | |
110 | } | |
7c5a3bbf | 111 | |
012c0a04 | 112 | for (i = 0; i < MAX_NUMA_NODES; i++) { |
7c5a3bbf AW |
113 | char* path; |
114 | ||
8ae587b9 IM |
115 | if (numa_supported) { |
116 | /* Constructs the path to node /sys/devices/system/nodeX. */ | |
117 | path = xasprintf("/sys/devices/system/node/node%d", i); | |
118 | } else { | |
119 | path = xasprintf("/sys/devices/system/cpu/"); | |
120 | } | |
121 | ||
7c5a3bbf AW |
122 | dir = opendir(path); |
123 | ||
012c0a04 | 124 | /* Creates 'struct numa_node' if the 'dir' is non-null. */ |
7c5a3bbf | 125 | if (dir) { |
012c0a04 | 126 | struct numa_node *n = xzalloc(sizeof *n); |
7c5a3bbf AW |
127 | struct dirent *subdir; |
128 | ||
012c0a04 AW |
129 | hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(i, 0)); |
130 | list_init(&n->cores); | |
131 | n->numa_id = i; | |
7c5a3bbf AW |
132 | |
133 | while ((subdir = readdir(dir)) != NULL) { | |
134 | if (!strncmp(subdir->d_name, "cpu", 3) | |
135 | && contain_all_digits(subdir->d_name + 3)){ | |
136 | struct cpu_core *c = xzalloc(sizeof *c); | |
bd5131ba | 137 | unsigned core_id; |
7c5a3bbf AW |
138 | |
139 | core_id = strtoul(subdir->d_name + 3, NULL, 10); | |
140 | hmap_insert(&all_cpu_cores, &c->hmap_node, | |
141 | hash_int(core_id, 0)); | |
012c0a04 | 142 | list_insert(&n->cores, &c->list_node); |
7c5a3bbf | 143 | c->core_id = core_id; |
46a14035 | 144 | c->numa = n; |
8db2f898 | 145 | c->available = true; |
7c5a3bbf AW |
146 | n_cpus++; |
147 | } | |
148 | } | |
012c0a04 AW |
149 | VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d", |
150 | list_size(&n->cores), n->numa_id); | |
7c5a3bbf | 151 | closedir(dir); |
8ae587b9 IM |
152 | } else if (errno != ENOENT) { |
153 | VLOG_WARN("opendir(%s) failed (%s)", path, | |
154 | ovs_strerror(errno)); | |
155 | } | |
156 | ||
157 | free(path); | |
158 | if (!dir || !numa_supported) { | |
7c5a3bbf AW |
159 | break; |
160 | } | |
161 | } | |
162 | ||
012c0a04 AW |
163 | VLOG_INFO("Discovered %"PRIuSIZE" NUMA nodes and %d CPU cores", |
164 | hmap_count(&all_numa_nodes), n_cpus); | |
165 | if (hmap_count(&all_numa_nodes) && hmap_count(&all_cpu_cores)) { | |
166 | found_numa_and_core = true; | |
7c5a3bbf AW |
167 | } |
168 | } | |
169 | ||
9da2564e AW |
170 | /* Gets 'struct cpu_core' by 'core_id'. */ |
171 | static struct cpu_core* | |
bd5131ba | 172 | get_core_by_core_id(unsigned core_id) |
9da2564e AW |
173 | { |
174 | struct cpu_core *core = NULL; | |
175 | ||
176 | if (ovs_numa_core_id_is_valid(core_id)) { | |
177 | core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, | |
178 | hash_int(core_id, 0)), | |
179 | struct cpu_core, hmap_node); | |
180 | } | |
181 | ||
182 | return core; | |
183 | } | |
184 | ||
185 | /* Gets 'struct numa_node' by 'numa_id'. */ | |
186 | static struct numa_node* | |
187 | get_numa_by_numa_id(int numa_id) | |
188 | { | |
189 | struct numa_node *numa = NULL; | |
190 | ||
191 | if (ovs_numa_numa_id_is_valid(numa_id)) { | |
192 | numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes, | |
193 | hash_int(numa_id, 0)), | |
194 | struct numa_node, hmap_node); | |
195 | } | |
196 | ||
197 | return numa; | |
198 | } | |
199 | ||
200 | \f | |
7c5a3bbf AW |
201 | /* Extracts the numa node and core info from the 'sysfs'. */ |
202 | void | |
203 | ovs_numa_init(void) | |
204 | { | |
205 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
206 | ||
207 | if (ovsthread_once_start(&once)) { | |
012c0a04 | 208 | discover_numa_and_core(); |
7c5a3bbf AW |
209 | ovsthread_once_done(&once); |
210 | } | |
211 | } | |
212 | ||
213 | bool | |
012c0a04 | 214 | ovs_numa_numa_id_is_valid(int numa_id) |
7c5a3bbf | 215 | { |
421aa227 | 216 | return found_numa_and_core && numa_id < ovs_numa_get_n_numas(); |
7c5a3bbf AW |
217 | } |
218 | ||
219 | bool | |
bd5131ba | 220 | ovs_numa_core_id_is_valid(unsigned core_id) |
7c5a3bbf | 221 | { |
421aa227 | 222 | return found_numa_and_core && core_id < ovs_numa_get_n_cores(); |
7c5a3bbf AW |
223 | } |
224 | ||
9da2564e | 225 | bool |
bd5131ba | 226 | ovs_numa_core_is_pinned(unsigned core_id) |
9da2564e AW |
227 | { |
228 | struct cpu_core *core = get_core_by_core_id(core_id); | |
229 | ||
230 | if (core) { | |
231 | return core->pinned; | |
232 | } | |
233 | ||
234 | return false; | |
235 | } | |
236 | ||
012c0a04 | 237 | /* Returns the number of numa nodes. */ |
7c5a3bbf | 238 | int |
012c0a04 | 239 | ovs_numa_get_n_numas(void) |
7c5a3bbf | 240 | { |
012c0a04 AW |
241 | return found_numa_and_core ? hmap_count(&all_numa_nodes) |
242 | : OVS_NUMA_UNSPEC; | |
7c5a3bbf AW |
243 | } |
244 | ||
245 | /* Returns the number of cpu cores. */ | |
246 | int | |
247 | ovs_numa_get_n_cores(void) | |
248 | { | |
012c0a04 AW |
249 | return found_numa_and_core ? hmap_count(&all_cpu_cores) |
250 | : OVS_CORE_UNSPEC; | |
7c5a3bbf AW |
251 | } |
252 | ||
6b1105fb AW |
253 | /* Given 'core_id', returns the corresponding numa node id. Returns |
254 | * OVS_NUMA_UNSPEC if 'core_id' is invalid. */ | |
255 | int | |
bd5131ba | 256 | ovs_numa_get_numa_id(unsigned core_id) |
6b1105fb | 257 | { |
9da2564e | 258 | struct cpu_core *core = get_core_by_core_id(core_id); |
6b1105fb | 259 | |
9da2564e | 260 | if (core) { |
6b1105fb AW |
261 | return core->numa->numa_id; |
262 | } | |
9da2564e | 263 | |
6b1105fb AW |
264 | return OVS_NUMA_UNSPEC; |
265 | } | |
266 | ||
421aa227 AW |
267 | /* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC |
268 | * if 'numa_id' is invalid. */ | |
7c5a3bbf | 269 | int |
012c0a04 | 270 | ovs_numa_get_n_cores_on_numa(int numa_id) |
7c5a3bbf | 271 | { |
9da2564e | 272 | struct numa_node *numa = get_numa_by_numa_id(numa_id); |
7c5a3bbf | 273 | |
9da2564e | 274 | if (numa) { |
012c0a04 | 275 | return list_size(&numa->cores); |
7c5a3bbf AW |
276 | } |
277 | ||
278 | return OVS_CORE_UNSPEC; | |
279 | } | |
280 | ||
8db2f898 AW |
281 | /* Returns the number of cpu cores that are available and unpinned |
282 | * on numa node. Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */ | |
7c5a3bbf | 283 | int |
012c0a04 | 284 | ovs_numa_get_n_unpinned_cores_on_numa(int numa_id) |
7c5a3bbf | 285 | { |
9da2564e AW |
286 | struct numa_node *numa = get_numa_by_numa_id(numa_id); |
287 | ||
288 | if (numa) { | |
7c5a3bbf AW |
289 | struct cpu_core *core; |
290 | int count = 0; | |
291 | ||
012c0a04 | 292 | LIST_FOR_EACH(core, list_node, &numa->cores) { |
8db2f898 | 293 | if (core->available && !core->pinned) { |
7c5a3bbf AW |
294 | count++; |
295 | } | |
296 | } | |
7c5a3bbf AW |
297 | return count; |
298 | } | |
299 | ||
300 | return OVS_CORE_UNSPEC; | |
301 | } | |
302 | ||
303 | /* Given 'core_id', tries to pin that core. Returns true, if succeeds. | |
8db2f898 AW |
304 | * False, if the core has already been pinned, or if it is invalid or |
305 | * not available. */ | |
7c5a3bbf | 306 | bool |
bd5131ba | 307 | ovs_numa_try_pin_core_specific(unsigned core_id) |
7c5a3bbf | 308 | { |
9da2564e | 309 | struct cpu_core *core = get_core_by_core_id(core_id); |
7c5a3bbf | 310 | |
9da2564e | 311 | if (core) { |
8db2f898 | 312 | if (core->available && !core->pinned) { |
421aa227 AW |
313 | core->pinned = true; |
314 | return true; | |
315 | } | |
7c5a3bbf AW |
316 | } |
317 | ||
318 | return false; | |
319 | } | |
320 | ||
8db2f898 AW |
321 | /* Searches through all cores for an unpinned and available core. Returns |
322 | * the 'core_id' if found and sets the 'core->pinned' to true. Otherwise, | |
323 | * returns OVS_CORE_UNSPEC. */ | |
bd5131ba | 324 | unsigned |
7c5a3bbf AW |
325 | ovs_numa_get_unpinned_core_any(void) |
326 | { | |
327 | struct cpu_core *core; | |
328 | ||
329 | HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) { | |
8db2f898 | 330 | if (core->available && !core->pinned) { |
7c5a3bbf AW |
331 | core->pinned = true; |
332 | return core->core_id; | |
333 | } | |
334 | } | |
335 | ||
336 | return OVS_CORE_UNSPEC; | |
337 | } | |
338 | ||
8db2f898 AW |
339 | /* Searches through all cores on numa node with 'numa_id' for an |
340 | * unpinned and available core. Returns the core_id if found and | |
341 | * sets the 'core->pinned' to true. Otherwise, returns OVS_CORE_UNSPEC. */ | |
bd5131ba | 342 | unsigned |
012c0a04 | 343 | ovs_numa_get_unpinned_core_on_numa(int numa_id) |
7c5a3bbf | 344 | { |
9da2564e AW |
345 | struct numa_node *numa = get_numa_by_numa_id(numa_id); |
346 | ||
347 | if (numa) { | |
421aa227 | 348 | struct cpu_core *core; |
7c5a3bbf | 349 | |
421aa227 | 350 | LIST_FOR_EACH(core, list_node, &numa->cores) { |
8db2f898 | 351 | if (core->available && !core->pinned) { |
421aa227 AW |
352 | core->pinned = true; |
353 | return core->core_id; | |
354 | } | |
7c5a3bbf AW |
355 | } |
356 | } | |
357 | ||
358 | return OVS_CORE_UNSPEC; | |
359 | } | |
360 | ||
8db2f898 | 361 | /* Unpins the core with 'core_id'. */ |
7c5a3bbf | 362 | void |
bd5131ba | 363 | ovs_numa_unpin_core(unsigned core_id) |
7c5a3bbf | 364 | { |
9da2564e | 365 | struct cpu_core *core = get_core_by_core_id(core_id); |
7c5a3bbf | 366 | |
9da2564e | 367 | if (core) { |
421aa227 AW |
368 | core->pinned = false; |
369 | } | |
7c5a3bbf AW |
370 | } |
371 | ||
9da2564e AW |
372 | /* Given the 'numa_id', returns dump of all cores on the numa node. */ |
373 | struct ovs_numa_dump * | |
374 | ovs_numa_dump_cores_on_numa(int numa_id) | |
375 | { | |
376 | struct ovs_numa_dump *dump = NULL; | |
377 | struct numa_node *numa = get_numa_by_numa_id(numa_id); | |
378 | ||
379 | if (numa) { | |
380 | struct cpu_core *core; | |
381 | ||
382 | dump = xmalloc(sizeof *dump); | |
383 | list_init(&dump->dump); | |
384 | LIST_FOR_EACH(core, list_node, &numa->cores) { | |
385 | struct ovs_numa_info *info = xmalloc(sizeof *info); | |
386 | ||
387 | info->numa_id = numa->numa_id; | |
388 | info->core_id = core->core_id; | |
389 | list_insert(&dump->dump, &info->list_node); | |
390 | } | |
391 | } | |
392 | ||
393 | return dump; | |
394 | } | |
395 | ||
396 | void | |
397 | ovs_numa_dump_destroy(struct ovs_numa_dump *dump) | |
398 | { | |
5f03c983 | 399 | struct ovs_numa_info *iter; |
9da2564e | 400 | |
5f03c983 | 401 | LIST_FOR_EACH_POP (iter, list_node, &dump->dump) { |
9da2564e AW |
402 | free(iter); |
403 | } | |
404 | ||
405 | free(dump); | |
406 | } | |
407 | ||
8db2f898 AW |
408 | /* Reads the cpu mask configuration from 'cmask' and sets the |
409 | * 'available' of corresponding cores. For unspecified cores, | |
410 | * sets 'available' to false. */ | |
411 | void | |
412 | ovs_numa_set_cpu_mask(const char *cmask) | |
413 | { | |
414 | int core_id = 0; | |
415 | int i; | |
416 | ||
417 | if (!found_numa_and_core) { | |
418 | return; | |
419 | } | |
420 | ||
421 | /* If no mask specified, resets the 'available' to true for all cores. */ | |
422 | if (!cmask) { | |
423 | struct cpu_core *core; | |
424 | ||
425 | HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) { | |
426 | core->available = true; | |
427 | } | |
428 | ||
429 | return; | |
430 | } | |
431 | ||
432 | for (i = strlen(cmask) - 1; i >= 0; i--) { | |
433 | char hex = toupper(cmask[i]); | |
434 | int bin, j; | |
435 | ||
436 | if (hex >= '0' && hex <= '9') { | |
437 | bin = hex - '0'; | |
438 | } else if (hex >= 'A' && hex <= 'F') { | |
439 | bin = hex - 'A' + 10; | |
440 | } else { | |
441 | bin = 0; | |
442 | VLOG_WARN("Invalid cpu mask: %c", cmask[i]); | |
443 | } | |
444 | ||
445 | for (j = 0; j < 4; j++) { | |
446 | struct cpu_core *core; | |
447 | ||
448 | core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, | |
449 | hash_int(core_id++, 0)), | |
450 | struct cpu_core, hmap_node); | |
451 | core->available = (bin >> j) & 0x1; | |
452 | ||
453 | if (core_id >= hmap_count(&all_cpu_cores)) { | |
454 | return; | |
455 | } | |
456 | } | |
457 | } | |
458 | ||
459 | /* For unspecified cores, sets 'available' to false. */ | |
460 | while (core_id < hmap_count(&all_cpu_cores)) { | |
461 | struct cpu_core *core; | |
462 | ||
463 | core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, | |
464 | hash_int(core_id++, 0)), | |
465 | struct cpu_core, hmap_node); | |
466 | core->available = false; | |
467 | } | |
468 | } | |
469 | ||
7c5a3bbf | 470 | #endif /* __linux__ */ |