]>
git.proxmox.com Git - ceph.git/blob - ceph/src/common/numa.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
10 #include "include/stringify.h"
11 #include "common/safe_io.h"
13 using namespace std::literals
;
19 #if defined(__linux__)
20 int parse_cpu_set_list(const char *s
,
27 int a
= strtol(s
, &end
, 10);
33 int b
= strtol(s
, &end
, 10);
43 *cpu_set_size
= a
+ 1;
56 std::string
cpu_set_to_str_list(size_t cpu_set_size
,
57 const cpu_set_t
*cpu_set
)
62 while (a
< cpu_set_size
&& !CPU_ISSET(a
, cpu_set
)) {
65 if (a
>= cpu_set_size
) {
69 while (b
< cpu_set_size
&& CPU_ISSET(b
, cpu_set
)) {
76 r
+= stringify(a
) + "-" + stringify(b
- 1);
85 std::set
<int> cpu_set_to_set(size_t cpu_set_size
,
86 const cpu_set_t
*cpu_set
)
91 while (a
< cpu_set_size
&& !CPU_ISSET(a
, cpu_set
)) {
94 if (a
>= cpu_set_size
) {
98 while (b
< cpu_set_size
&& CPU_ISSET(b
, cpu_set
)) {
110 int get_numa_node_cpu_set(
112 size_t *cpu_set_size
,
115 std::string fn
= "/sys/devices/system/node/node";
116 fn
+= stringify(node
);
118 int fd
= ::open(fn
.c_str(), O_RDONLY
);
123 int r
= safe_read(fd
, &buf
, sizeof(buf
));
128 while (r
> 0 && ::isspace(buf
[--r
])) {
131 r
= parse_cpu_set_list(buf
, cpu_set_size
, cpu_set
);
141 static int easy_readdir(const std::string
& dir
, std::set
<std::string
> *out
)
143 DIR *h
= ::opendir(dir
.c_str());
147 struct dirent
*de
= nullptr;
148 while ((de
= ::readdir(h
))) {
149 if (strcmp(de
->d_name
, ".") == 0 ||
150 strcmp(de
->d_name
, "..") == 0) {
153 out
->insert(de
->d_name
);
159 static std::string
get_task_comm(pid_t tid
)
161 static const char* comm_fmt
= "/proc/self/task/%d/comm";
162 char comm_name
[strlen(comm_fmt
) + 8];
163 snprintf(comm_name
, sizeof(comm_name
), comm_fmt
, tid
);
164 int fd
= open(comm_name
, O_CLOEXEC
| O_RDONLY
);
169 static constexpr int TASK_COMM_LEN
= 16;
170 char name
[TASK_COMM_LEN
];
171 ssize_t n
= safe_read(fd
, name
, sizeof(name
));
176 assert(n
<= sizeof(name
));
177 if (name
[n
- 1] == '\n') {
185 int set_cpu_affinity_all_threads(size_t cpu_set_size
, cpu_set_t
*cpu_set
)
187 // first set my affinity
188 int r
= sched_setaffinity(getpid(), cpu_set_size
, cpu_set
);
193 // make 2 passes here so that we (hopefully) catch racing threads creating
195 for (unsigned pass
= 0; pass
< 2; ++pass
) {
196 // enumerate all child threads from /proc
197 std::set
<std::string
> ls
;
198 std::string path
= "/proc/"s
+ stringify(getpid()) + "/task";
199 r
= easy_readdir(path
, &ls
);
204 pid_t tid
= atoll(i
.c_str());
209 std::string thread_name
= get_task_comm(tid
);
210 static const char *dpdk_worker_name
= "lcore-worker";
211 if (!thread_name
.compare(0, strlen(dpdk_worker_name
), dpdk_worker_name
)) {
212 // ignore dpdk reactor thread, as it takes case of numa by itself
216 r
= sched_setaffinity(tid
, cpu_set_size
, cpu_set
);
226 int parse_cpu_set_list(const char *s
,
227 size_t *cpu_set_size
,
233 std::string
cpu_set_to_str_list(size_t cpu_set_size
,
234 const cpu_set_t
*cpu_set
)
239 std::set
<int> cpu_set_to_set(size_t cpu_set_size
,
240 const cpu_set_t
*cpu_set
)
245 int get_numa_node_cpu_set(int node
,
246 size_t *cpu_set_size
,
252 int set_cpu_affinity_all_threads(size_t cpu_set_size
,