]>
Commit | Line | Data |
---|---|---|
7c5a3bbf AW |
1 | /* |
2 | * Copyright (c) 2014 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | /* On non-Linux, these functions are defined inline in ovs-numa.h. */ | |
18 | #ifdef __linux__ | |
19 | ||
20 | #include <config.h> | |
21 | #include "ovs-numa.h" | |
22 | ||
23 | #include <ctype.h> | |
24 | #include <dirent.h> | |
25 | #include <errno.h> | |
26 | #include <stddef.h> | |
27 | #include <string.h> | |
28 | #include <sys/types.h> | |
29 | #include <unistd.h> | |
30 | ||
31 | #include "hash.h" | |
32 | #include "hmap.h" | |
33 | #include "list.h" | |
34 | #include "ovs-thread.h" | |
e6211adc | 35 | #include "openvswitch/vlog.h" |
7c5a3bbf AW |
36 | |
37 | VLOG_DEFINE_THIS_MODULE(ovs_numa); | |
38 | ||
34185750 AW |
39 | /* ovs-numa module |
40 | * =============== | |
41 | * | |
42 | * This module stores the affinity information of numa nodes and cpu cores. | |
43 | * It also provides functions to bookkeep the pin of threads on cpu cores. | |
44 | * | |
45 | * It is assumed that the numa node ids and cpu core ids all start from 0 and | |
46 | * range continuously. So, for example, if 'ovs_numa_get_n_cores()' returns N, | |
47 | * user can assume core ids from 0 to N-1 are all valid and there is a | |
48 | * 'struct cpu_core' for each id. | |
49 | * | |
50 | * NOTE, the assumption above will fail when cpu hotplug is used. In that | |
51 | * case ovs-numa will not function correctly. For now, add a TODO entry | |
52 | * for addressing it in the future. | |
53 | * | |
54 | * TODO: Fix ovs-numa when cpu hotplug is used. | |
55 | */ | |
56 | ||
012c0a04 | 57 | #define MAX_NUMA_NODES 128 |
7c5a3bbf | 58 | |
012c0a04 AW |
59 | /* numa node. */ |
60 | struct numa_node { | |
61 | struct hmap_node hmap_node; /* In the 'all_numa_nodes'. */ | |
ca6ba700 | 62 | struct ovs_list cores; /* List of cpu cores on the numa node. */ |
012c0a04 | 63 | int numa_id; /* numa node id. */ |
7c5a3bbf AW |
64 | }; |
65 | ||
012c0a04 | 66 | /* Cpu core on a numa node. */ |
7c5a3bbf AW |
67 | struct cpu_core { |
68 | struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */ | |
ca6ba700 | 69 | struct ovs_list list_node; /* In 'numa_node->cores' list. */ |
012c0a04 | 70 | struct numa_node *numa; /* numa node containing the core. */ |
7c5a3bbf | 71 | int core_id; /* Core id. */ |
8db2f898 | 72 | bool available; /* If the core can be pinned. */ |
7c5a3bbf AW |
73 | bool pinned; /* If a thread has been pinned to the core. */ |
74 | }; | |
75 | ||
012c0a04 AW |
76 | /* Contains all 'struct numa_node's. */ |
77 | static struct hmap all_numa_nodes = HMAP_INITIALIZER(&all_numa_nodes); | |
7c5a3bbf AW |
78 | /* Contains all 'struct cpu_core's. */ |
79 | static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores); | |
012c0a04 AW |
80 | /* True if numa node and core info are correctly extracted. */ |
81 | static bool found_numa_and_core; | |
7c5a3bbf AW |
82 | |
83 | /* Returns true if 'str' contains all digits. Returns false otherwise. */ | |
84 | static bool | |
85 | contain_all_digits(const char *str) | |
86 | { | |
87 | return str[strspn(str, "0123456789")] == '\0'; | |
88 | } | |
89 | ||
012c0a04 AW |
90 | /* Discovers all numa nodes and the corresponding cpu cores. |
91 | * Constructs the 'struct numa_node' and 'struct cpu_core'. */ | |
7c5a3bbf | 92 | static void |
012c0a04 | 93 | discover_numa_and_core(void) |
7c5a3bbf AW |
94 | { |
95 | int n_cpus = 0; | |
96 | int i; | |
97 | ||
012c0a04 | 98 | for (i = 0; i < MAX_NUMA_NODES; i++) { |
7c5a3bbf AW |
99 | DIR *dir; |
100 | char* path; | |
101 | ||
102 | /* Constructs the path to node /sys/devices/system/nodeX. */ | |
103 | path = xasprintf("/sys/devices/system/node/node%d", i); | |
104 | dir = opendir(path); | |
105 | ||
012c0a04 | 106 | /* Creates 'struct numa_node' if the 'dir' is non-null. */ |
7c5a3bbf | 107 | if (dir) { |
012c0a04 | 108 | struct numa_node *n = xzalloc(sizeof *n); |
7c5a3bbf AW |
109 | struct dirent *subdir; |
110 | ||
012c0a04 AW |
111 | hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(i, 0)); |
112 | list_init(&n->cores); | |
113 | n->numa_id = i; | |
7c5a3bbf AW |
114 | |
115 | while ((subdir = readdir(dir)) != NULL) { | |
116 | if (!strncmp(subdir->d_name, "cpu", 3) | |
117 | && contain_all_digits(subdir->d_name + 3)){ | |
118 | struct cpu_core *c = xzalloc(sizeof *c); | |
119 | uint32_t core_id; | |
120 | ||
121 | core_id = strtoul(subdir->d_name + 3, NULL, 10); | |
122 | hmap_insert(&all_cpu_cores, &c->hmap_node, | |
123 | hash_int(core_id, 0)); | |
012c0a04 | 124 | list_insert(&n->cores, &c->list_node); |
7c5a3bbf | 125 | c->core_id = core_id; |
46a14035 | 126 | c->numa = n; |
8db2f898 | 127 | c->available = true; |
7c5a3bbf AW |
128 | n_cpus++; |
129 | } | |
130 | } | |
012c0a04 AW |
131 | VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d", |
132 | list_size(&n->cores), n->numa_id); | |
7c5a3bbf AW |
133 | free(path); |
134 | closedir(dir); | |
135 | } else { | |
136 | if (errno != ENOENT) { | |
137 | VLOG_WARN("opendir(%s) failed (%s)", path, | |
138 | ovs_strerror(errno)); | |
139 | } | |
140 | free(path); | |
141 | break; | |
142 | } | |
143 | } | |
144 | ||
012c0a04 AW |
145 | VLOG_INFO("Discovered %"PRIuSIZE" NUMA nodes and %d CPU cores", |
146 | hmap_count(&all_numa_nodes), n_cpus); | |
147 | if (hmap_count(&all_numa_nodes) && hmap_count(&all_cpu_cores)) { | |
148 | found_numa_and_core = true; | |
7c5a3bbf AW |
149 | } |
150 | } | |
151 | ||
152 | /* Extracts the numa node and core info from the 'sysfs'. */ | |
153 | void | |
154 | ovs_numa_init(void) | |
155 | { | |
156 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
157 | ||
158 | if (ovsthread_once_start(&once)) { | |
012c0a04 | 159 | discover_numa_and_core(); |
7c5a3bbf AW |
160 | ovsthread_once_done(&once); |
161 | } | |
162 | } | |
163 | ||
164 | bool | |
012c0a04 | 165 | ovs_numa_numa_id_is_valid(int numa_id) |
7c5a3bbf | 166 | { |
421aa227 | 167 | return found_numa_and_core && numa_id < ovs_numa_get_n_numas(); |
7c5a3bbf AW |
168 | } |
169 | ||
170 | bool | |
012c0a04 | 171 | ovs_numa_core_id_is_valid(int core_id) |
7c5a3bbf | 172 | { |
421aa227 | 173 | return found_numa_and_core && core_id < ovs_numa_get_n_cores(); |
7c5a3bbf AW |
174 | } |
175 | ||
012c0a04 | 176 | /* Returns the number of numa nodes. */ |
7c5a3bbf | 177 | int |
012c0a04 | 178 | ovs_numa_get_n_numas(void) |
7c5a3bbf | 179 | { |
012c0a04 AW |
180 | return found_numa_and_core ? hmap_count(&all_numa_nodes) |
181 | : OVS_NUMA_UNSPEC; | |
7c5a3bbf AW |
182 | } |
183 | ||
184 | /* Returns the number of cpu cores. */ | |
185 | int | |
186 | ovs_numa_get_n_cores(void) | |
187 | { | |
012c0a04 AW |
188 | return found_numa_and_core ? hmap_count(&all_cpu_cores) |
189 | : OVS_CORE_UNSPEC; | |
7c5a3bbf AW |
190 | } |
191 | ||
6b1105fb AW |
192 | /* Given 'core_id', returns the corresponding numa node id. Returns |
193 | * OVS_NUMA_UNSPEC if 'core_id' is invalid. */ | |
194 | int | |
195 | ovs_numa_get_numa_id(int core_id) | |
196 | { | |
197 | if (ovs_numa_core_id_is_valid(core_id)) { | |
198 | struct cpu_core *core; | |
199 | ||
200 | core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, | |
201 | hash_int(core_id, 0)), | |
202 | struct cpu_core, hmap_node); | |
203 | ||
204 | return core->numa->numa_id; | |
205 | } | |
206 | return OVS_NUMA_UNSPEC; | |
207 | } | |
208 | ||
421aa227 AW |
209 | /* Returns the number of cpu cores on numa node. Returns OVS_CORE_UNSPEC |
210 | * if 'numa_id' is invalid. */ | |
7c5a3bbf | 211 | int |
012c0a04 | 212 | ovs_numa_get_n_cores_on_numa(int numa_id) |
7c5a3bbf | 213 | { |
421aa227 | 214 | if (ovs_numa_numa_id_is_valid(numa_id)) { |
012c0a04 | 215 | struct numa_node *numa; |
7c5a3bbf | 216 | |
012c0a04 AW |
217 | numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes, |
218 | hash_int(numa_id, 0)), | |
219 | struct numa_node, hmap_node); | |
7c5a3bbf | 220 | |
012c0a04 | 221 | return list_size(&numa->cores); |
7c5a3bbf AW |
222 | } |
223 | ||
224 | return OVS_CORE_UNSPEC; | |
225 | } | |
226 | ||
8db2f898 AW |
227 | /* Returns the number of cpu cores that are available and unpinned |
228 | * on numa node. Returns OVS_CORE_UNSPEC if 'numa_id' is invalid. */ | |
7c5a3bbf | 229 | int |
012c0a04 | 230 | ovs_numa_get_n_unpinned_cores_on_numa(int numa_id) |
7c5a3bbf | 231 | { |
421aa227 | 232 | if (ovs_numa_numa_id_is_valid(numa_id)) { |
012c0a04 | 233 | struct numa_node *numa; |
7c5a3bbf AW |
234 | struct cpu_core *core; |
235 | int count = 0; | |
236 | ||
012c0a04 AW |
237 | numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes, |
238 | hash_int(numa_id, 0)), | |
239 | struct numa_node, hmap_node); | |
240 | LIST_FOR_EACH(core, list_node, &numa->cores) { | |
8db2f898 | 241 | if (core->available && !core->pinned) { |
7c5a3bbf AW |
242 | count++; |
243 | } | |
244 | } | |
245 | ||
246 | return count; | |
247 | } | |
248 | ||
249 | return OVS_CORE_UNSPEC; | |
250 | } | |
251 | ||
252 | /* Given 'core_id', tries to pin that core. Returns true, if succeeds. | |
8db2f898 AW |
253 | * False, if the core has already been pinned, or if it is invalid or |
254 | * not available. */ | |
7c5a3bbf AW |
255 | bool |
256 | ovs_numa_try_pin_core_specific(int core_id) | |
257 | { | |
421aa227 AW |
258 | if (ovs_numa_core_id_is_valid(core_id)) { |
259 | struct cpu_core *core; | |
7c5a3bbf | 260 | |
421aa227 AW |
261 | core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, |
262 | hash_int(core_id, 0)), | |
263 | struct cpu_core, hmap_node); | |
8db2f898 | 264 | if (core->available && !core->pinned) { |
421aa227 AW |
265 | core->pinned = true; |
266 | return true; | |
267 | } | |
7c5a3bbf AW |
268 | } |
269 | ||
270 | return false; | |
271 | } | |
272 | ||
8db2f898 AW |
273 | /* Searches through all cores for an unpinned and available core. Returns |
274 | * the 'core_id' if found and sets the 'core->pinned' to true. Otherwise, | |
275 | * returns OVS_CORE_UNSPEC. */ | |
7c5a3bbf AW |
276 | int |
277 | ovs_numa_get_unpinned_core_any(void) | |
278 | { | |
279 | struct cpu_core *core; | |
280 | ||
281 | HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) { | |
8db2f898 | 282 | if (core->available && !core->pinned) { |
7c5a3bbf AW |
283 | core->pinned = true; |
284 | return core->core_id; | |
285 | } | |
286 | } | |
287 | ||
288 | return OVS_CORE_UNSPEC; | |
289 | } | |
290 | ||
8db2f898 AW |
291 | /* Searches through all cores on numa node with 'numa_id' for an |
292 | * unpinned and available core. Returns the core_id if found and | |
293 | * sets the 'core->pinned' to true. Otherwise, returns OVS_CORE_UNSPEC. */ | |
7c5a3bbf | 294 | int |
012c0a04 | 295 | ovs_numa_get_unpinned_core_on_numa(int numa_id) |
7c5a3bbf | 296 | { |
421aa227 AW |
297 | if (ovs_numa_numa_id_is_valid(numa_id)) { |
298 | struct numa_node *numa; | |
299 | struct cpu_core *core; | |
7c5a3bbf | 300 | |
421aa227 AW |
301 | numa = CONTAINER_OF(hmap_first_with_hash(&all_numa_nodes, |
302 | hash_int(numa_id, 0)), | |
303 | struct numa_node, hmap_node); | |
304 | LIST_FOR_EACH(core, list_node, &numa->cores) { | |
8db2f898 | 305 | if (core->available && !core->pinned) { |
421aa227 AW |
306 | core->pinned = true; |
307 | return core->core_id; | |
308 | } | |
7c5a3bbf AW |
309 | } |
310 | } | |
311 | ||
312 | return OVS_CORE_UNSPEC; | |
313 | } | |
314 | ||
8db2f898 | 315 | /* Unpins the core with 'core_id'. */ |
7c5a3bbf AW |
316 | void |
317 | ovs_numa_unpin_core(int core_id) | |
318 | { | |
421aa227 AW |
319 | if (ovs_numa_core_id_is_valid(core_id)) { |
320 | struct cpu_core *core; | |
7c5a3bbf | 321 | |
421aa227 AW |
322 | core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, |
323 | hash_int(core_id, 0)), | |
324 | struct cpu_core, hmap_node); | |
325 | core->pinned = false; | |
326 | } | |
7c5a3bbf AW |
327 | } |
328 | ||
8db2f898 AW |
329 | /* Reads the cpu mask configuration from 'cmask' and sets the |
330 | * 'available' of corresponding cores. For unspecified cores, | |
331 | * sets 'available' to false. */ | |
332 | void | |
333 | ovs_numa_set_cpu_mask(const char *cmask) | |
334 | { | |
335 | int core_id = 0; | |
336 | int i; | |
337 | ||
338 | if (!found_numa_and_core) { | |
339 | return; | |
340 | } | |
341 | ||
342 | /* If no mask specified, resets the 'available' to true for all cores. */ | |
343 | if (!cmask) { | |
344 | struct cpu_core *core; | |
345 | ||
346 | HMAP_FOR_EACH(core, hmap_node, &all_cpu_cores) { | |
347 | core->available = true; | |
348 | } | |
349 | ||
350 | return; | |
351 | } | |
352 | ||
353 | for (i = strlen(cmask) - 1; i >= 0; i--) { | |
354 | char hex = toupper(cmask[i]); | |
355 | int bin, j; | |
356 | ||
357 | if (hex >= '0' && hex <= '9') { | |
358 | bin = hex - '0'; | |
359 | } else if (hex >= 'A' && hex <= 'F') { | |
360 | bin = hex - 'A' + 10; | |
361 | } else { | |
362 | bin = 0; | |
363 | VLOG_WARN("Invalid cpu mask: %c", cmask[i]); | |
364 | } | |
365 | ||
366 | for (j = 0; j < 4; j++) { | |
367 | struct cpu_core *core; | |
368 | ||
369 | core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, | |
370 | hash_int(core_id++, 0)), | |
371 | struct cpu_core, hmap_node); | |
372 | core->available = (bin >> j) & 0x1; | |
373 | ||
374 | if (core_id >= hmap_count(&all_cpu_cores)) { | |
375 | return; | |
376 | } | |
377 | } | |
378 | } | |
379 | ||
380 | /* For unspecified cores, sets 'available' to false. */ | |
381 | while (core_id < hmap_count(&all_cpu_cores)) { | |
382 | struct cpu_core *core; | |
383 | ||
384 | core = CONTAINER_OF(hmap_first_with_hash(&all_cpu_cores, | |
385 | hash_int(core_id++, 0)), | |
386 | struct cpu_core, hmap_node); | |
387 | core->available = false; | |
388 | } | |
389 | } | |
390 | ||
7c5a3bbf | 391 | #endif /* __linux__ */ |