]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/numa.cc
import ceph quincy 17.2.6
[ceph.git] / ceph / src / common / numa.cc
CommitLineData
11fdf7f2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include "numa.h"
5
6#include <cstring>
7#include <errno.h>
8#include <iostream>
9
10#include "include/stringify.h"
11#include "common/safe_io.h"
12
f67539c2
TL
13using namespace std::literals;
14
15using std::set;
16
11fdf7f2
TL
17
18// list
19#if defined(__linux__)
20int parse_cpu_set_list(const char *s,
21 size_t *cpu_set_size,
22 cpu_set_t *cpu_set)
23{
24 CPU_ZERO(cpu_set);
25 while (*s) {
26 char *end;
27 int a = strtol(s, &end, 10);
28 if (end == s) {
29 return -EINVAL;
30 }
31 if (*end == '-') {
32 s = end + 1;
33 int b = strtol(s, &end, 10);
34 if (end == s) {
35 return -EINVAL;
36 }
37 for (; a <= b; ++a) {
38 CPU_SET(a, cpu_set);
39 }
40 *cpu_set_size = a;
41 } else {
42 CPU_SET(a, cpu_set);
43 *cpu_set_size = a + 1;
44 }
45 if (*end == 0) {
46 break;
47 }
48 if (*end != ',') {
49 return -EINVAL;
50 }
51 s = end + 1;
52 }
53 return 0;
54}
55
56std::string cpu_set_to_str_list(size_t cpu_set_size,
57 const cpu_set_t *cpu_set)
58{
59 std::string r;
60 unsigned a = 0;
61 while (true) {
62 while (a < cpu_set_size && !CPU_ISSET(a, cpu_set)) {
63 ++a;
64 }
65 if (a >= cpu_set_size) {
66 break;
67 }
68 unsigned b = a + 1;
69 while (b < cpu_set_size && CPU_ISSET(b, cpu_set)) {
70 ++b;
71 }
72 if (r.size()) {
73 r += ",";
74 }
75 if (b > a + 1) {
76 r += stringify(a) + "-" + stringify(b - 1);
77 } else {
78 r += stringify(a);
79 }
80 a = b;
81 }
82 return r;
83}
84
85std::set<int> cpu_set_to_set(size_t cpu_set_size,
86 const cpu_set_t *cpu_set)
87{
88 set<int> r;
89 unsigned a = 0;
90 while (true) {
91 while (a < cpu_set_size && !CPU_ISSET(a, cpu_set)) {
92 ++a;
93 }
94 if (a >= cpu_set_size) {
95 break;
96 }
97 unsigned b = a + 1;
98 while (b < cpu_set_size && CPU_ISSET(b, cpu_set)) {
99 ++b;
100 }
101 while (a < b) {
102 r.insert(a);
103 ++a;
104 }
105 }
106 return r;
107}
108
109
110int get_numa_node_cpu_set(
111 int node,
112 size_t *cpu_set_size,
113 cpu_set_t *cpu_set)
114{
115 std::string fn = "/sys/devices/system/node/node";
116 fn += stringify(node);
117 fn += "/cpulist";
118 int fd = ::open(fn.c_str(), O_RDONLY);
119 if (fd < 0) {
120 return -errno;
121 }
122 char buf[1024];
123 int r = safe_read(fd, &buf, sizeof(buf));
124 if (r < 0) {
125 goto out;
126 }
127 buf[r] = 0;
128 while (r > 0 && ::isspace(buf[--r])) {
129 buf[r] = 0;
130 }
131 r = parse_cpu_set_list(buf, cpu_set_size, cpu_set);
132 if (r < 0) {
133 goto out;
134 }
135 r = 0;
136 out:
137 ::close(fd);
138 return r;
139}
140
92f5a8d4
TL
141static int easy_readdir(const std::string& dir, std::set<std::string> *out)
142{
143 DIR *h = ::opendir(dir.c_str());
144 if (!h) {
145 return -errno;
146 }
147 struct dirent *de = nullptr;
148 while ((de = ::readdir(h))) {
149 if (strcmp(de->d_name, ".") == 0 ||
150 strcmp(de->d_name, "..") == 0) {
151 continue;
152 }
153 out->insert(de->d_name);
154 }
155 closedir(h);
156 return 0;
157}
158
20effc67
TL
159static std::string get_task_comm(pid_t tid)
160{
161 static const char* comm_fmt = "/proc/self/task/%d/comm";
162 char comm_name[strlen(comm_fmt) + 8];
163 snprintf(comm_name, sizeof(comm_name), comm_fmt, tid);
164 int fd = open(comm_name, O_CLOEXEC | O_RDONLY);
165 if (fd == -1) {
166 return "";
167 }
168 // see linux/sched.h
169 static constexpr int TASK_COMM_LEN = 16;
170 char name[TASK_COMM_LEN];
171 ssize_t n = safe_read(fd, name, sizeof(name));
172 close(fd);
173 if (n < 0) {
174 return "";
175 }
176 assert(n <= sizeof(name));
177 if (name[n - 1] == '\n') {
178 name[n - 1] = '\0';
179 } else {
180 name[n] = '\0';
181 }
182 return name;
183}
184
92f5a8d4
TL
185int set_cpu_affinity_all_threads(size_t cpu_set_size, cpu_set_t *cpu_set)
186{
187 // first set my affinity
188 int r = sched_setaffinity(getpid(), cpu_set_size, cpu_set);
189 if (r < 0) {
190 return -errno;
191 }
192
193 // make 2 passes here so that we (hopefully) catch racing threads creating
194 // threads.
195 for (unsigned pass = 0; pass < 2; ++pass) {
196 // enumerate all child threads from /proc
197 std::set<std::string> ls;
198 std::string path = "/proc/"s + stringify(getpid()) + "/task";
199 r = easy_readdir(path, &ls);
200 if (r < 0) {
201 return r;
202 }
203 for (auto& i : ls) {
204 pid_t tid = atoll(i.c_str());
205 if (!tid) {
206 continue; // wtf
207 }
20effc67
TL
208 #ifdef HAVE_DPDK
209 std::string thread_name = get_task_comm(tid);
210 static const char *dpdk_worker_name = "lcore-worker";
211 if (!thread_name.compare(0, strlen(dpdk_worker_name), dpdk_worker_name)) {
212 // ignore dpdk reactor thread, as it takes case of numa by itself
213 continue;
214 }
215 #endif
92f5a8d4
TL
216 r = sched_setaffinity(tid, cpu_set_size, cpu_set);
217 if (r < 0) {
218 return -errno;
219 }
220 }
221 }
222 return 0;
223}
224
f67539c2 225#else
11fdf7f2
TL
226int parse_cpu_set_list(const char *s,
227 size_t *cpu_set_size,
228 cpu_set_t *cpu_set)
229{
230 return -ENOTSUP;
231}
232
233std::string cpu_set_to_str_list(size_t cpu_set_size,
234 const cpu_set_t *cpu_set)
235{
236 return {};
237}
238
239std::set<int> cpu_set_to_set(size_t cpu_set_size,
240 const cpu_set_t *cpu_set)
241{
242 return {};
243}
244
245int get_numa_node_cpu_set(int node,
246 size_t *cpu_set_size,
247 cpu_set_t *cpu_set)
248{
249 return -ENOTSUP;
250}
251
92f5a8d4
TL
252int set_cpu_affinity_all_threads(size_t cpu_set_size,
253 cpu_set_t *cpu_set)
254{
255 return -ENOTSUP;
256}
257
11fdf7f2 258#endif