]> git.proxmox.com Git - mirror_lxcfs.git/blob - src/sysfs_fuse.c
sys: virtualize /sys/devices/system/cpu/cpu*
[mirror_lxcfs.git] / src / sysfs_fuse.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6
7 #include "config.h"
8
9 #ifdef HAVE_FUSE3
10 #ifndef FUSE_USE_VERSION
11 #define FUSE_USE_VERSION 30
12 #endif
13 #else
14 #ifndef FUSE_USE_VERSION
15 #define FUSE_USE_VERSION 26
16 #endif
17 #endif
18
19 /* Taken over modified from the kernel sources. */
20 #define NBITS 32 /* bits in uint32_t */
21 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
22 #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, NBITS)
23
24 #define _FILE_OFFSET_BITS 64
25
26 #define __STDC_FORMAT_MACROS
27 #include <dirent.h>
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <fuse.h>
31 #include <inttypes.h>
32 #include <libgen.h>
33 #include <pthread.h>
34 #include <sched.h>
35 #include <stdbool.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <time.h>
41 #include <unistd.h>
42 #include <wait.h>
43 #include <linux/magic.h>
44 #include <linux/sched.h>
45 #include <sys/epoll.h>
46 #include <sys/mman.h>
47 #include <sys/mount.h>
48 #include <sys/param.h>
49 #include <sys/socket.h>
50 #include <sys/syscall.h>
51 #include <sys/sysinfo.h>
52 #include <sys/vfs.h>
53
54 #include "bindings.h"
55 #include "memory_utils.h"
56 #include "cgroups/cgroup.h"
57 #include "lxcfs_fuse_compat.h"
58 #include "sysfs_fuse.h"
59 #include "utils.h"
60
61 static ssize_t get_max_cpus(char *cpulist)
62 {
63 char *c1, *c2;
64 char *maxcpus = cpulist;
65 size_t cpus = 0;
66
67 c1 = strrchr(maxcpus, ',');
68 if (c1)
69 c1++;
70
71 c2 = strrchr(maxcpus, '-');
72 if (c2)
73 c2++;
74
75 if (!c1 && !c2)
76 c1 = maxcpus;
77 else if (c1 > c2)
78 c2 = c1;
79 else if (c1 < c2)
80 c1 = c2;
81 else if (!c1 && c2)
82 c1 = c2;
83
84 errno = 0;
85 cpus = strtoul(c1, NULL, 0);
86 if (errno != 0)
87 return -1;
88
89 return cpus;
90 }
91
92 static void set_bit(unsigned bit, uint32_t *bitarr)
93 {
94 bitarr[bit / NBITS] |= (1 << (bit % NBITS));
95 }
96
97 static bool is_set(unsigned bit, uint32_t *bitarr)
98 {
99 return (bitarr[bit / NBITS] & (1 << (bit % NBITS))) != 0;
100 }
101
102 /* Create cpumask from cpulist aka turn:
103 *
104 * 0,2-3
105 *
106 * into bit array
107 *
108 * 1 0 1 1
109 */
110 static uint32_t *lxc_cpumask(char *buf, size_t nbits)
111 {
112 __do_free uint32_t *bitarr = NULL;
113 char *token;
114 size_t arrlen;
115
116 arrlen = BITS_TO_LONGS(nbits);
117 bitarr = calloc(arrlen, sizeof(uint32_t));
118 if (!bitarr)
119 return ret_set_errno(NULL, ENOMEM);
120
121 lxc_iterate_parts(token, buf, ",") {
122 errno = 0;
123 unsigned end, start;
124 char *range;
125
126 start = strtoul(token, NULL, 0);
127 end = start;
128 range = strchr(token, '-');
129 if (range)
130 end = strtoul(range + 1, NULL, 0);
131
132 if (!(start <= end))
133 return ret_set_errno(NULL, EINVAL);
134
135 if (end >= nbits)
136 return ret_set_errno(NULL, EINVAL);
137
138 while (start <= end)
139 set_bit(start++, bitarr);
140 }
141
142 return move_ptr(bitarr);
143 }
144
145 static int sys_devices_system_cpu_online_read(char *buf, size_t size,
146 off_t offset,
147 struct fuse_file_info *fi)
148 {
149 __do_free char *cg = NULL, *cpuset = NULL;
150 struct fuse_context *fc = fuse_get_context();
151 struct lxcfs_opts *opts = (struct lxcfs_opts *)fc->private_data;
152 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
153 char *cache = d->buf;
154 bool use_view;
155
156 int max_cpus = 0;
157 pid_t initpid;
158 ssize_t total_len = 0;
159
160 if (offset) {
161 int left;
162
163 if (!d->cached)
164 return 0;
165
166 if (offset > d->size)
167 return -EINVAL;
168
169 left = d->size - offset;
170 total_len = left > size ? size : left;
171 memcpy(buf, cache + offset, total_len);
172
173 return total_len;
174 }
175
176 initpid = lookup_initpid_in_store(fc->pid);
177 if (initpid <= 1 || is_shared_pidns(initpid))
178 initpid = fc->pid;
179
180 cg = get_pid_cgroup(initpid, "cpuset");
181 if (!cg)
182 return read_file_fuse("/sys/devices/system/cpu/online", buf, size, d);
183 prune_init_slice(cg);
184
185 cpuset = get_cpuset(cg);
186 if (!cpuset)
187 return 0;
188
189 if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs)
190 use_view = true;
191 else
192 use_view = false;
193
194 if (use_view)
195 max_cpus = max_cpu_count(cg);
196
197 if (use_view) {
198 if (max_cpus > 1)
199 total_len = snprintf(d->buf, d->buflen, "0-%d\n", max_cpus - 1);
200 else
201 total_len = snprintf(d->buf, d->buflen, "0\n");
202 } else {
203 total_len = snprintf(d->buf, d->buflen, "%s\n", cpuset);
204 }
205 if (total_len < 0 || total_len >= d->buflen)
206 return log_error(0, "Failed to write to cache");
207
208 d->size = (int)total_len;
209 d->cached = 1;
210
211 if (total_len > size)
212 total_len = size;
213
214 memcpy(buf, d->buf, total_len);
215
216 return total_len;
217 }
218
219 static int filler_sys_devices_system_cpu(const char *path, void *buf,
220 fuse_fill_dir_t filler)
221 {
222 __do_free uint32_t *cpumask = NULL;
223 __do_free char *cg = NULL, *cpuset = NULL;
224 __do_closedir DIR *dir = NULL;
225 struct dirent *dirent;
226 struct fuse_context *fc = fuse_get_context();
227 pid_t initpid;
228 ssize_t max_cpus;
229 size_t len;
230 char cpu[100];
231
232 initpid = lookup_initpid_in_store(fc->pid);
233 if (initpid <= 1 || is_shared_pidns(initpid))
234 initpid = fc->pid;
235
236 cg = get_pid_cgroup(initpid, "cpuset");
237 if (!cg)
238 return 0;
239 prune_init_slice(cg);
240
241 cpuset = get_cpuset(cg);
242 if (!cpuset)
243 return 0;
244
245 max_cpus = get_max_cpus(cpuset);
246 if (max_cpus < 0 || max_cpus >= (INT_MAX - 1))
247 return -1;
248 max_cpus++;
249
250 cpumask = lxc_cpumask(cpuset, max_cpus);
251 if (!cpumask)
252 return -errno;
253
254 for (size_t i = 0; i < max_cpus; i++) {
255 int ret;
256
257 if (!is_set(i, cpumask))
258 continue;
259
260 ret = snprintf(cpu, sizeof(cpu), "cpu%ld", i);
261 if (ret < 0 || ret >= sizeof(cpu))
262 continue;
263
264 if (DIR_FILLER(filler, buf, cpu, NULL, 0) != 0)
265 return -ENOENT;
266 }
267
268 dir = opendir(path);
269 if (!dir)
270 return -ENOENT;
271
272 while ((dirent = readdir(dir))) {
273 len = strlen(dirent->d_name);
274 if (strncmp(dirent->d_name, "cpu", 3) == 0 &&
275 dirent->d_name[len - 1] >= '0' &&
276 dirent->d_name[len - 1] <= '9')
277 continue;
278
279 if (DIR_FILLER(filler, buf, dirent->d_name, NULL, 0) != 0)
280 return -ENOENT;
281 }
282
283 return 0;
284 }
285
286 static mode_t get_st_mode(const char *path)
287 {
288 struct stat sb;
289 int ret;
290
291 ret = lstat(path, &sb);
292 if (ret < 0)
293 return -ENOENT;
294
295 return sb.st_mode;
296 }
297
298 static off_t get_sysfile_size(const char *which)
299 {
300 __do_fclose FILE *f = NULL;
301 __do_free char *line = NULL;
302 size_t len = 0;
303 ssize_t sz, answer = 0;
304
305 f = fopen(which, "re");
306 if (!f)
307 return 0;
308
309 while ((sz = getline(&line, &len, f)) != -1)
310 answer += sz;
311
312 return answer;
313 }
314
315 __lxcfs_fuse_ops int sys_getattr(const char *path, struct stat *sb)
316 {
317 struct timespec now;
318 mode_t st_mode;
319
320 if (!liblxcfs_functional())
321 return -EIO;
322
323 memset(sb, 0, sizeof(struct stat));
324 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
325 return -EINVAL;
326
327 sb->st_uid = sb->st_gid = 0;
328 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
329
330 st_mode = get_st_mode(path);
331 if (st_mode < 0)
332 return -ENOENT;
333
334 if (S_ISDIR(st_mode)) {
335 sb->st_mode = st_mode;
336 sb->st_nlink = 2;
337 return 0;
338 }
339
340 if (S_ISREG(st_mode) || S_ISLNK(st_mode)) {
341 sb->st_size = get_sysfile_size(path);
342 sb->st_mode = st_mode;
343 sb->st_nlink = 1;
344 return 0;
345 }
346
347 return -ENOENT;
348 }
349
350 __lxcfs_fuse_ops int sys_write(const char *path, const char *buf,
351 size_t size, off_t offset,
352 struct fuse_file_info *fi)
353 {
354 __do_close int fd = -EBADF;
355 struct file_info *f = INTTYPE_TO_PTR(fi->fh);
356
357 if (!liblxcfs_functional())
358 return -EIO;
359
360 if (f->type != LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE)
361 return -EINVAL;
362
363 fd = open(path, O_WRONLY | O_CLOEXEC);
364 if (fd == -1)
365 return -errno;
366
367 return pwrite(fd, buf, size, offset);
368 }
369
370 __lxcfs_fuse_ops int sys_readdir(const char *path, void *buf,
371 fuse_fill_dir_t filler, off_t offset,
372 struct fuse_file_info *fi)
373 {
374 __do_closedir DIR *dir = NULL;
375 struct dirent *dirent;
376 struct file_info *f = INTTYPE_TO_PTR(fi->fh);
377
378 if (!liblxcfs_functional())
379 return -EIO;
380
381 switch (f->type) {
382 case LXC_TYPE_SYS: {
383 if (DIR_FILLER(filler, buf, ".", NULL, 0) != 0 ||
384 DIR_FILLER(filler, buf, "..", NULL, 0) != 0 ||
385 DIR_FILLER(filler, buf, "devices", NULL, 0) != 0)
386 return -ENOENT;
387
388 return 0;
389 }
390 case LXC_TYPE_SYS_DEVICES: {
391 if (DIR_FILLER(filler, buf, ".", NULL, 0) != 0 ||
392 DIR_FILLER(filler, buf, "..", NULL, 0) != 0 ||
393 DIR_FILLER(filler, buf, "system", NULL, 0) != 0)
394 return -ENOENT;
395
396 return 0;
397 }
398 case LXC_TYPE_SYS_DEVICES_SYSTEM: {
399 if (DIR_FILLER(filler, buf, ".", NULL, 0) != 0 ||
400 DIR_FILLER(filler, buf, "..", NULL, 0) != 0 ||
401 DIR_FILLER(filler, buf, "cpu", NULL, 0) != 0)
402 return -ENOENT;
403
404 return 0;
405 }
406 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU:
407 return filler_sys_devices_system_cpu(path, buf, filler);
408 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBDIR: {
409 dir = opendir(path);
410 if (!dir)
411 return -ENOENT;
412
413 while ((dirent = readdir(dir))) {
414 if (DIR_FILLER(filler, buf, dirent->d_name, NULL, 0) != 0)
415 return -ENOENT;
416 }
417
418 return 0;
419 }
420 }
421
422 return -EINVAL;
423 }
424
425 __lxcfs_fuse_ops int sys_readlink(const char *path, char *buf, size_t size)
426 {
427 int ret = readlink(path, buf, size);
428
429 if (!liblxcfs_functional())
430 return -EIO;
431
432 if (ret < 0)
433 return -errno;
434 if (ret > size)
435 return -1;
436
437 buf[ret] = '\0';
438
439 return 0;
440 }
441 __lxcfs_fuse_ops int sys_open(const char *path, struct fuse_file_info *fi)
442 {
443 __do_free struct file_info *info = NULL;
444 int type = -1;
445
446 if (!liblxcfs_functional())
447 return -EIO;
448
449 if (strcmp(path, "/sys/devices/system/cpu/online") == 0)
450 type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE;
451 else if (strncmp(path, "/sys/devices/system/cpu/",
452 STRLITERALLEN("/sys/devices/system/cpu/")) == 0 &&
453 S_ISREG(get_st_mode(path)))
454 type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE;
455 if (type == -1)
456 return -ENOENT;
457
458 info = malloc(sizeof(*info));
459 if (!info)
460 return -ENOMEM;
461
462 memset(info, 0, sizeof(*info));
463 info->type = type;
464
465 info->buflen = get_sysfile_size(path) + BUF_RESERVE_SIZE;
466
467 info->buf = malloc(info->buflen);
468 if (!info->buf)
469 return -ENOMEM;
470
471 memset(info->buf, 0, info->buflen);
472 /* set actual size to buffer size */
473 info->size = info->buflen;
474
475 fi->fh = PTR_TO_UINT64(move_ptr(info));
476 return 0;
477 }
478
479 __lxcfs_fuse_ops int sys_opendir(const char *path, struct fuse_file_info *fi)
480 {
481 __do_free struct file_info *dir_info = NULL;
482 int type = -1;
483
484 if (!liblxcfs_functional())
485 return -EIO;
486
487 if (strcmp(path, "/sys") == 0)
488 type = LXC_TYPE_SYS;
489 if (strcmp(path, "/sys/devices") == 0)
490 type = LXC_TYPE_SYS_DEVICES;
491 if (strcmp(path, "/sys/devices/system") == 0)
492 type = LXC_TYPE_SYS_DEVICES_SYSTEM;
493 if (strcmp(path, "/sys/devices/system/cpu") == 0)
494 type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU;
495 if (strncmp(path, "/sys/devices/system/cpu/",
496 STRLITERALLEN("/sys/devices/system/cpu/")) == 0 &&
497 S_ISDIR(get_st_mode(path)))
498 type = LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBDIR;
499 if (type == -1)
500 return -ENOENT;
501
502 dir_info = malloc(sizeof(*dir_info));
503 if (!dir_info)
504 return -ENOMEM;
505
506 memset(dir_info, 0, sizeof(*dir_info));
507 dir_info->type = type;
508 dir_info->buf = NULL;
509 dir_info->file = NULL;
510 dir_info->buflen = 0;
511
512 fi->fh = PTR_TO_UINT64(move_ptr(dir_info));
513 return 0;
514 }
515
516 __lxcfs_fuse_ops int sys_access(const char *path, int mask)
517 {
518 if (!liblxcfs_functional())
519 return -EIO;
520
521 return access(path, mask);
522 }
523
524 __lxcfs_fuse_ops int sys_release(const char *path, struct fuse_file_info *fi)
525 {
526 do_release_file_info(fi);
527 return 0;
528 }
529
530 __lxcfs_fuse_ops int sys_releasedir(const char *path, struct fuse_file_info *fi)
531 {
532 do_release_file_info(fi);
533 return 0;
534 }
535
536 __lxcfs_fuse_ops int sys_read(const char *path, char *buf, size_t size,
537 off_t offset, struct fuse_file_info *fi)
538 {
539 struct file_info *f = INTTYPE_TO_PTR(fi->fh);
540
541 if (!liblxcfs_functional())
542 return -EIO;
543
544 switch (f->type) {
545 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE:
546 return sys_devices_system_cpu_online_read(buf, size, offset, fi);
547 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE:
548 return read_file_fuse_with_offset(path, buf, size, offset, f);
549 }
550
551 return -EINVAL;
552 }