1 /* SPDX-License-Identifier: LGPL-2.1+ */
21 #include <linux/magic.h>
22 #include <linux/sched.h>
23 #include <sys/epoll.h>
25 #include <sys/mount.h>
26 #include <sys/param.h>
27 #include <sys/socket.h>
28 #include <sys/syscall.h>
29 #include <sys/sysinfo.h>
32 #include "sysfs_fuse.h"
35 #include "memory_utils.h"
36 #include "cgroups/cgroup.h"
37 #include "lxcfs_fuse_compat.h"
40 static off_t
get_sysfile_size(const char *which
);
41 /* Create cpumask from cpulist aka turn:
49 static int lxc_cpumask(char *buf
, __u32
**bitarr
, __u32
*last_set_bit
)
51 __do_free __u32
*arr_u32
= NULL
;
52 __u32 cur_last_set_bit
= 0, nbits
= 256;
56 nr_u32
= BITS_TO_LONGS(nbits
);
57 arr_u32
= zalloc(nr_u32
* sizeof(__u32
));
59 return ret_errno(ENOMEM
);
61 lxc_iterate_parts(token
, buf
, ",") {
62 __u32 last_bit
, first_bit
;
66 first_bit
= strtoul(token
, NULL
, 0);
68 range
= strchr(token
, '-');
70 last_bit
= strtoul(range
+ 1, NULL
, 0);
72 if (!(first_bit
<= last_bit
))
73 return ret_errno(EINVAL
);
75 if (last_bit
>= nbits
) {
76 __u32 add_bits
= last_bit
- nbits
+ 32;
80 new_nr_u32
= BITS_TO_LONGS(nbits
+ add_bits
);
81 p
= realloc(arr_u32
, new_nr_u32
* sizeof(uint32_t));
83 return ret_errno(ENOMEM
);
84 arr_u32
= move_ptr(p
);
86 memset(arr_u32
+ nr_u32
, 0,
87 (new_nr_u32
- nr_u32
) * sizeof(uint32_t));
91 while (first_bit
<= last_bit
)
92 set_bit(first_bit
++, arr_u32
);
94 if (last_bit
> cur_last_set_bit
)
95 cur_last_set_bit
= last_bit
;
98 *last_set_bit
= cur_last_set_bit
;
99 *bitarr
= move_ptr(arr_u32
);
103 static int lxc_cpumask_update(char *buf
, __u32
*bitarr
, __u32 last_set_bit
,
106 bool flipped
= false;
109 lxc_iterate_parts(token
, buf
, ",") {
110 __u32 last_bit
, first_bit
;
114 first_bit
= strtoul(token
, NULL
, 0);
115 last_bit
= first_bit
;
116 range
= strchr(token
, '-');
118 last_bit
= strtoul(range
+ 1, NULL
, 0);
120 if (!(first_bit
<= last_bit
)) {
121 lxcfs_debug("The cup range seems to be inverted: %u-%u", first_bit
, last_bit
);
125 if (last_bit
> last_set_bit
)
128 while (first_bit
<= last_bit
) {
129 if (clear
&& is_set(first_bit
, bitarr
)) {
131 clear_bit(first_bit
, bitarr
);
132 } else if (!clear
&& !is_set(first_bit
, bitarr
)) {
134 set_bit(first_bit
, bitarr
);
147 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
148 #define __OFFLINE_CPUS "/sys/devices/system/cpu/offline"
149 static int cpumask(char *posscpus
, __u32
**bitarr
, __u32
*last_set_bit
)
151 __do_free
char *isolcpus
= NULL
, *offlinecpus
= NULL
;
152 __do_free __u32
*possmask
= NULL
;
154 __u32 poss_last_set_bit
= 0;
156 if (file_exists(__ISOL_CPUS
)) {
157 isolcpus
= read_file_at(-EBADF
, __ISOL_CPUS
, PROTECT_OPEN
);
161 if (!isdigit(isolcpus
[0]))
162 free_disarm(isolcpus
);
164 lxcfs_debug("The path \""__ISOL_CPUS
"\" to read isolated cpus from does not exist");
167 if (file_exists(__OFFLINE_CPUS
)) {
168 offlinecpus
= read_file_at(-EBADF
, __OFFLINE_CPUS
, PROTECT_OPEN
);
172 if (!isdigit(offlinecpus
[0]))
173 free_disarm(offlinecpus
);
175 lxcfs_debug("The path \""__OFFLINE_CPUS
"\" to read offline cpus from does not exist");
178 ret
= lxc_cpumask(posscpus
, &possmask
, &poss_last_set_bit
);
183 ret
= lxc_cpumask_update(isolcpus
, possmask
, poss_last_set_bit
, true);
186 ret
|= lxc_cpumask_update(offlinecpus
, possmask
, poss_last_set_bit
, true);
190 *bitarr
= move_ptr(possmask
);
191 *last_set_bit
= poss_last_set_bit
;
195 static int do_cpuset_read(char *cg
, char *buf
, size_t buflen
)
197 __do_free
char *cpuset
= NULL
;
198 struct fuse_context
*fc
= fuse_get_context();
199 struct lxcfs_opts
*opts
= (struct lxcfs_opts
*)fc
->private_data
;
201 ssize_t total_len
= 0;
204 cpuset
= get_cpuset(cg
);
208 if (cgroup_ops
->can_use_cpuview(cgroup_ops
) && opts
&& opts
->use_cfs
)
214 max_cpus
= max_cpu_count(cg
);
218 total_len
= snprintf(buf
, buflen
, "0-%d\n", max_cpus
- 1);
220 total_len
= snprintf(buf
, buflen
, "0\n");
222 total_len
= snprintf(buf
, buflen
, "%s\n", cpuset
);
224 if (total_len
< 0 || (size_t)total_len
>= buflen
)
225 return log_error(0, "Failed to write to cache");
230 static int sys_devices_system_cpu_online_read(char *buf
, size_t size
,
232 struct fuse_file_info
*fi
)
234 __do_free
char *cg
= NULL
;
235 struct fuse_context
*fc
= fuse_get_context();
236 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
237 char *cache
= d
->buf
;
239 ssize_t total_len
= 0;
247 if (offset
> d
->size
)
250 left
= d
->size
- offset
;
251 total_len
= left
> size
? size
: left
;
252 memcpy(buf
, cache
+ offset
, total_len
);
257 initpid
= lookup_initpid_in_store(fc
->pid
);
258 if (initpid
<= 1 || is_shared_pidns(initpid
))
261 cg
= get_pid_cgroup(initpid
, "cpuset");
263 return read_file_fuse("/sys/devices/system/cpu/online", buf
, size
, d
);
264 prune_init_slice(cg
);
266 total_len
= do_cpuset_read(cg
, d
->buf
, d
->buflen
);
268 d
->size
= (int)total_len
;
271 if ((size_t)total_len
> size
)
274 memcpy(buf
, d
->buf
, total_len
);
279 static int sys_devices_system_cpu_online_getsize(const char *path
)
281 __do_free
char *cg
= NULL
;
282 struct fuse_context
*fc
= fuse_get_context();
284 char buf
[BUF_RESERVE_SIZE
];
285 int buflen
= sizeof(buf
);
287 initpid
= lookup_initpid_in_store(fc
->pid
);
288 if (initpid
<= 1 || is_shared_pidns(initpid
))
291 cg
= get_pid_cgroup(initpid
, "cpuset");
293 return get_sysfile_size(path
);
294 prune_init_slice(cg
);
296 return do_cpuset_read(cg
, buf
, buflen
);
299 static int filler_sys_devices_system_cpu(const char *path
, void *buf
,
300 fuse_fill_dir_t filler
)
302 __do_free __u32
*bitarr
= NULL
;
303 __do_free
char *cg
= NULL
, *cpuset
= NULL
;
304 __do_closedir
DIR *dirp
= NULL
;
305 struct fuse_context
*fc
= fuse_get_context();
306 __u32 last_set_bit
= 0;
308 struct dirent
*dirent
;
311 initpid
= lookup_initpid_in_store(fc
->pid
);
312 if (initpid
<= 1 || is_shared_pidns(initpid
))
315 cg
= get_pid_cgroup(initpid
, "cpuset");
318 prune_init_slice(cg
);
320 cpuset
= get_cpuset(cg
);
324 ret
= cpumask(cpuset
, &bitarr
, &last_set_bit
);
328 dirp
= opendir(path
);
332 for (__u32 bit
= 0; bit
<= last_set_bit
; bit
++) {
335 if (!is_set(bit
, bitarr
))
338 ret
= snprintf(cpu
, sizeof(cpu
), "cpu%u", bit
);
339 if (ret
< 0 || (size_t)ret
>= sizeof(cpu
))
342 if (dir_fillerat(filler
, dirp
, cpu
, buf
, 0) != 0)
346 while ((dirent
= readdir(dirp
))) {
347 char *entry
= dirent
->d_name
;
349 if (strlen(entry
) > 3) {
352 /* Don't emit entries we already filtered above. */
357 if (dirent_fillerat(filler
, dirp
, dirent
, buf
, 0) != 0)
364 static int get_st_mode(const char *path
, mode_t
*mode
)
369 ret
= lstat(path
, &sb
);
377 static off_t
get_sysfile_size(const char *which
)
379 __do_fclose
FILE *f
= NULL
;
380 __do_free
char *line
= NULL
;
382 ssize_t sz
, answer
= 0;
384 f
= fopen(which
, "re");
388 while ((sz
= getline(&line
, &len
, f
)) != -1)
394 static int sys_getattr_legacy(const char *path
, struct stat
*sb
)
398 memset(sb
, 0, sizeof(struct stat
));
399 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
402 sb
->st_uid
= sb
->st_gid
= 0;
403 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
404 if (strcmp(path
, "/sys") == 0) {
405 sb
->st_mode
= S_IFDIR
| 00555;
410 if (strcmp(path
, "/sys/devices") == 0) {
411 sb
->st_mode
= S_IFDIR
| 00555;
416 if (strcmp(path
, "/sys/devices/system") == 0) {
417 sb
->st_mode
= S_IFDIR
| 00555;
422 if (strcmp(path
, "/sys/devices/system/cpu") == 0) {
423 sb
->st_mode
= S_IFDIR
| 00555;
428 if (strcmp(path
, "/sys/devices/system/cpu/online") == 0) {
429 sb
->st_size
= sys_devices_system_cpu_online_getsize(path
);
430 sb
->st_mode
= S_IFREG
| 00444;
438 __lxcfs_fuse_ops
int sys_getattr(const char *path
, struct stat
*sb
)
444 if (!liblxcfs_functional())
447 if (!liblxcfs_can_use_sys_cpu())
448 return sys_getattr_legacy(path
, sb
);
450 memset(sb
, 0, sizeof(struct stat
));
451 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
454 sb
->st_uid
= sb
->st_gid
= 0;
455 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
457 ret
= get_st_mode(path
, &st_mode
);
461 if (S_ISDIR(st_mode
)) {
462 sb
->st_mode
= st_mode
;
467 if (S_ISREG(st_mode
) || S_ISLNK(st_mode
)) {
468 if (strcmp(path
, "/sys/devices/system/cpu/online") == 0)
469 sb
->st_size
= sys_devices_system_cpu_online_getsize(path
);
471 sb
->st_size
= get_sysfile_size(path
);
472 sb
->st_mode
= st_mode
;
480 __lxcfs_fuse_ops
int sys_release(const char *path
, struct fuse_file_info
*fi
)
482 do_release_file_info(fi
);
486 __lxcfs_fuse_ops
int sys_releasedir(const char *path
, struct fuse_file_info
*fi
)
488 do_release_file_info(fi
);
492 __lxcfs_fuse_ops
int sys_write(const char *path
, const char *buf
, size_t size
,
493 off_t offset
, struct fuse_file_info
*fi
)
495 __do_close
int fd
= -EBADF
;
496 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
498 if (!liblxcfs_functional())
501 if (f
->type
!= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE
)
504 fd
= open(path
, O_WRONLY
| O_CLOEXEC
);
508 return pwrite(fd
, buf
, size
, offset
);
511 static int sys_readdir_legacy(const char *path
, void *buf
, fuse_fill_dir_t filler
,
512 off_t offset
, struct fuse_file_info
*fi
)
514 if (strcmp(path
, "/sys") == 0) {
515 if (dir_filler(filler
, buf
, ".", 0) != 0 ||
516 dir_filler(filler
, buf
, "..", 0) != 0 ||
517 dirent_filler(filler
, path
, "devices", buf
, 0) != 0)
522 if (strcmp(path
, "/sys/devices") == 0) {
523 if (dir_filler(filler
, buf
, ".", 0) != 0 ||
524 dir_filler(filler
, buf
, "..", 0) != 0 ||
525 dirent_filler(filler
, path
, "system", buf
, 0) != 0)
530 if (strcmp(path
, "/sys/devices/system") == 0) {
531 if (dir_filler(filler
, buf
, ".", 0) != 0 ||
532 dir_filler(filler
, buf
, "..", 0) != 0 ||
533 dirent_filler(filler
, path
, "cpu", buf
, 0) != 0)
538 if (strcmp(path
, "/sys/devices/system/cpu") == 0) {
539 if (dir_filler(filler
, buf
, ".", 0) != 0 ||
540 dir_filler(filler
, buf
, "..", 0) != 0 ||
541 dirent_filler(filler
, path
, "online", buf
, 0) != 0)
550 __lxcfs_fuse_ops
int sys_readdir(const char *path
, void *buf
,
551 fuse_fill_dir_t filler
, off_t offset
,
552 struct fuse_file_info
*fi
)
554 __do_closedir
DIR *dirp
= NULL
;
555 struct dirent
*dirent
;
556 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
558 if (!liblxcfs_functional())
561 if (!liblxcfs_can_use_sys_cpu())
562 return sys_readdir_legacy(path
, buf
, filler
, offset
, fi
);
565 * When we reload LXCFS and we don't load the lxcfs binary itself
566 * changes to such functions as lxcfs_opendir() aren't reflected so
567 * sys_opendir() doesn't run but sys_readdir() does. We need to account
575 if (dir_filler(filler
, buf
, ".", 0) != 0 ||
576 dir_filler(filler
, buf
, "..", 0) != 0 ||
577 dirent_filler(filler
, path
, "devices", buf
, 0) != 0)
580 case LXC_TYPE_SYS_DEVICES
:
581 if (dir_filler(filler
, buf
, ".", 0) != 0 ||
582 dir_filler(filler
, buf
, "..", 0) != 0 ||
583 dirent_filler(filler
, path
, "system", buf
, 0) != 0)
586 case LXC_TYPE_SYS_DEVICES_SYSTEM
:
587 if (dir_filler(filler
, buf
, ".", 0) != 0 ||
588 dir_filler(filler
, buf
, "..", 0) != 0 ||
589 dirent_filler(filler
, path
, "cpu", buf
, 0) != 0)
592 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU
:
593 if (dir_filler(filler
, buf
, ".", 0) != 0 ||
594 dir_filler(filler
, buf
, "..", 0) != 0)
597 return filler_sys_devices_system_cpu(path
, buf
, filler
);
598 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBDIR
:
599 dirp
= opathdir(path
);
603 while ((dirent
= readdir(dirp
))) {
604 if (dirent_fillerat(filler
, dirp
, dirent
, buf
, 0) != 0)
613 __lxcfs_fuse_ops
int sys_readlink(const char *path
, char *buf
, size_t size
)
617 if (!liblxcfs_functional())
620 ret
= readlink(path
, buf
, size
);
624 if ((size_t)ret
> size
)
632 static int sys_open_legacy(const char *path
, struct fuse_file_info
*fi
)
634 __do_free
struct file_info
*info
= NULL
;
637 if (strcmp(path
, "/sys/devices") == 0)
638 type
= LXC_TYPE_SYS_DEVICES
;
639 if (strcmp(path
, "/sys/devices/system") == 0)
640 type
= LXC_TYPE_SYS_DEVICES_SYSTEM
;
641 if (strcmp(path
, "/sys/devices/system/cpu") == 0)
642 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU
;
643 if (strcmp(path
, "/sys/devices/system/cpu/online") == 0)
644 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE
;
648 info
= malloc(sizeof(*info
));
652 memset(info
, 0, sizeof(*info
));
655 info
->buflen
= get_sysfile_size(path
) + BUF_RESERVE_SIZE
;
657 info
->buf
= malloc(info
->buflen
);
661 memset(info
->buf
, 0, info
->buflen
);
662 /* set actual size to buffer size */
663 info
->size
= info
->buflen
;
665 fi
->fh
= PTR_TO_UINT64(move_ptr(info
));
669 __lxcfs_fuse_ops
int sys_open(const char *path
, struct fuse_file_info
*fi
)
671 __do_free
struct file_info
*info
= NULL
;
674 if (!liblxcfs_functional())
677 if (!liblxcfs_can_use_sys_cpu())
678 return sys_open_legacy(path
, fi
);
680 if (strcmp(path
, "/sys/devices/system/cpu/online") == 0) {
681 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE
;
682 } else if (strncmp(path
, "/sys/devices/system/cpu/",
683 STRLITERALLEN("/sys/devices/system/cpu/")) == 0) {
687 ret
= get_st_mode(path
, &st_mode
);
691 if (S_ISREG(st_mode
))
692 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE
;
697 info
= malloc(sizeof(*info
));
701 memset(info
, 0, sizeof(*info
));
704 info
->buflen
= get_sysfile_size(path
) + BUF_RESERVE_SIZE
;
706 info
->buf
= malloc(info
->buflen
);
710 memset(info
->buf
, 0, info
->buflen
);
711 /* set actual size to buffer size */
712 info
->size
= info
->buflen
;
714 fi
->fh
= PTR_TO_UINT64(move_ptr(info
));
718 __lxcfs_fuse_ops
int sys_opendir(const char *path
, struct fuse_file_info
*fi
)
720 __do_free
struct file_info
*dir_info
= NULL
;
723 if (!liblxcfs_functional())
726 if (strcmp(path
, "/sys") == 0) {
728 } else if (strcmp(path
, "/sys/devices") == 0) {
729 type
= LXC_TYPE_SYS_DEVICES
;
730 } else if (strcmp(path
, "/sys/devices/system") == 0) {
731 type
= LXC_TYPE_SYS_DEVICES_SYSTEM
;
732 } else if (strcmp(path
, "/sys/devices/system/cpu") == 0) {
733 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU
;
734 } else if (strncmp(path
, "/sys/devices/system/cpu/",
735 STRLITERALLEN("/sys/devices/system/cpu/")) == 0) {
739 ret
= get_st_mode(path
, &st_mode
);
743 if (S_ISDIR(st_mode
))
744 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBDIR
;
749 dir_info
= malloc(sizeof(*dir_info
));
753 memset(dir_info
, 0, sizeof(*dir_info
));
754 dir_info
->type
= type
;
755 dir_info
->buf
= NULL
;
756 dir_info
->file
= NULL
;
757 dir_info
->buflen
= 0;
759 fi
->fh
= PTR_TO_UINT64(move_ptr(dir_info
));
763 static int sys_access_legacy(const char *path
, int mask
)
765 if (strcmp(path
, "/sys") == 0 && access(path
, R_OK
) == 0)
768 if (strcmp(path
, "/sys/devices") == 0 && access(path
, R_OK
) == 0)
771 if (strcmp(path
, "/sys/devices/system") == 0 && access(path
, R_OK
) == 0)
774 if (strcmp(path
, "/sys/devices/system/cpu") == 0 &&
775 access(path
, R_OK
) == 0)
778 /* these are all read-only */
779 if ((mask
& ~R_OK
) != 0)
785 __lxcfs_fuse_ops
int sys_access(const char *path
, int mask
)
787 if (!liblxcfs_functional())
790 if (!liblxcfs_can_use_sys_cpu())
791 return sys_access_legacy(path
, mask
);
793 return access(path
, mask
);
796 static int sys_read_legacy(const char *path
, char *buf
, size_t size
,
797 off_t offset
, struct fuse_file_info
*fi
)
799 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
802 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE
:
803 if (liblxcfs_functional())
804 return sys_devices_system_cpu_online_read(buf
, size
, offset
, fi
);
806 return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH
,
807 buf
, size
, offset
, f
);
808 case LXC_TYPE_SYS_DEVICES
:
810 case LXC_TYPE_SYS_DEVICES_SYSTEM
:
812 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU
:
819 __lxcfs_fuse_ops
int sys_read(const char *path
, char *buf
, size_t size
,
820 off_t offset
, struct fuse_file_info
*fi
)
822 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
824 if (!liblxcfs_functional())
827 if (!liblxcfs_can_use_sys_cpu())
828 return sys_read_legacy(path
, buf
, size
, offset
, fi
);
831 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE
:
832 return sys_devices_system_cpu_online_read(buf
, size
, offset
, fi
);
833 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE
:
834 return read_file_fuse_with_offset(path
, buf
, size
, offset
, f
);