1 /* SPDX-License-Identifier: LGPL-2.1+ */
21 #include <linux/magic.h>
22 #include <linux/sched.h>
23 #include <sys/epoll.h>
25 #include <sys/mount.h>
26 #include <sys/param.h>
27 #include <sys/socket.h>
28 #include <sys/syscall.h>
29 #include <sys/sysinfo.h>
32 #include "sysfs_fuse.h"
35 #include "memory_utils.h"
36 #include "cgroups/cgroup.h"
37 #include "lxcfs_fuse_compat.h"
40 /* Create cpumask from cpulist aka turn:
48 static int lxc_cpumask(char *buf
, __u32
**bitarr
, __u32
*last_set_bit
)
50 __do_free __u32
*arr_u32
= NULL
;
51 __u32 cur_last_set_bit
= 0, nbits
= 256;
55 nr_u32
= BITS_TO_LONGS(nbits
);
56 arr_u32
= zalloc(nr_u32
* sizeof(__u32
));
58 return ret_errno(ENOMEM
);
60 lxc_iterate_parts(token
, buf
, ",") {
61 __u32 last_bit
, first_bit
;
65 first_bit
= strtoul(token
, NULL
, 0);
67 range
= strchr(token
, '-');
69 last_bit
= strtoul(range
+ 1, NULL
, 0);
71 if (!(first_bit
<= last_bit
))
72 return ret_errno(EINVAL
);
74 if (last_bit
>= nbits
) {
75 __u32 add_bits
= last_bit
- nbits
+ 32;
79 new_nr_u32
= BITS_TO_LONGS(nbits
+ add_bits
);
80 p
= realloc(arr_u32
, new_nr_u32
* sizeof(uint32_t));
82 return ret_errno(ENOMEM
);
83 arr_u32
= move_ptr(p
);
85 memset(arr_u32
+ nr_u32
, 0,
86 (new_nr_u32
- nr_u32
) * sizeof(uint32_t));
90 while (first_bit
<= last_bit
)
91 set_bit(first_bit
++, arr_u32
);
93 if (last_bit
> cur_last_set_bit
)
94 cur_last_set_bit
= last_bit
;
97 *last_set_bit
= cur_last_set_bit
;
98 *bitarr
= move_ptr(arr_u32
);
102 static int lxc_cpumask_update(char *buf
, __u32
*bitarr
, __u32 last_set_bit
,
105 bool flipped
= false;
108 lxc_iterate_parts(token
, buf
, ",") {
109 __u32 last_bit
, first_bit
;
113 first_bit
= strtoul(token
, NULL
, 0);
114 last_bit
= first_bit
;
115 range
= strchr(token
, '-');
117 last_bit
= strtoul(range
+ 1, NULL
, 0);
119 if (!(first_bit
<= last_bit
)) {
120 lxcfs_debug("The cup range seems to be inverted: %u-%u", first_bit
, last_bit
);
124 if (last_bit
> last_set_bit
)
127 while (first_bit
<= last_bit
) {
128 if (clear
&& is_set(first_bit
, bitarr
)) {
130 clear_bit(first_bit
, bitarr
);
131 } else if (!clear
&& !is_set(first_bit
, bitarr
)) {
133 set_bit(first_bit
, bitarr
);
146 #define __ISOL_CPUS "/sys/devices/system/cpu/isolated"
147 #define __OFFLINE_CPUS "/sys/devices/system/cpu/offline"
148 static int cpumask(char *posscpus
, __u32
**bitarr
, __u32
*last_set_bit
)
150 __do_free
char *isolcpus
= NULL
, *offlinecpus
= NULL
;
151 __do_free __u32
*possmask
= NULL
;
153 __u32 poss_last_set_bit
= 0;
155 if (file_exists(__ISOL_CPUS
)) {
156 isolcpus
= read_file_at(-EBADF
, __ISOL_CPUS
, PROTECT_OPEN
);
160 if (!isdigit(isolcpus
[0]))
161 free_disarm(isolcpus
);
163 lxcfs_debug("The path \""__ISOL_CPUS
"\" to read isolated cpus from does not exist");
166 if (file_exists(__OFFLINE_CPUS
)) {
167 offlinecpus
= read_file_at(-EBADF
, __OFFLINE_CPUS
, PROTECT_OPEN
);
171 if (!isdigit(offlinecpus
[0]))
172 free_disarm(offlinecpus
);
174 lxcfs_debug("The path \""__OFFLINE_CPUS
"\" to read offline cpus from does not exist");
177 ret
= lxc_cpumask(posscpus
, &possmask
, &poss_last_set_bit
);
182 ret
= lxc_cpumask_update(isolcpus
, possmask
, poss_last_set_bit
, true);
185 ret
|= lxc_cpumask_update(offlinecpus
, possmask
, poss_last_set_bit
, true);
189 *bitarr
= move_ptr(possmask
);
190 *last_set_bit
= poss_last_set_bit
;
194 static int sys_devices_system_cpu_online_read(char *buf
, size_t size
,
196 struct fuse_file_info
*fi
)
198 __do_free
char *cg
= NULL
, *cpuset
= NULL
;
199 struct fuse_context
*fc
= fuse_get_context();
200 struct lxcfs_opts
*opts
= (struct lxcfs_opts
*)fc
->private_data
;
201 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
202 char *cache
= d
->buf
;
205 ssize_t total_len
= 0;
214 if (offset
> d
->size
)
217 left
= d
->size
- offset
;
218 total_len
= left
> size
? size
: left
;
219 memcpy(buf
, cache
+ offset
, total_len
);
224 initpid
= lookup_initpid_in_store(fc
->pid
);
225 if (initpid
<= 1 || is_shared_pidns(initpid
))
228 cg
= get_pid_cgroup(initpid
, "cpuset");
230 return read_file_fuse("/sys/devices/system/cpu/online", buf
, size
, d
);
231 prune_init_slice(cg
);
233 cpuset
= get_cpuset(cg
);
237 if (cgroup_ops
->can_use_cpuview(cgroup_ops
) && opts
&& opts
->use_cfs
)
243 max_cpus
= max_cpu_count(cg
);
247 total_len
= snprintf(d
->buf
, d
->buflen
, "0-%d\n", max_cpus
- 1);
249 total_len
= snprintf(d
->buf
, d
->buflen
, "0\n");
251 total_len
= snprintf(d
->buf
, d
->buflen
, "%s\n", cpuset
);
253 if (total_len
< 0 || total_len
>= d
->buflen
)
254 return log_error(0, "Failed to write to cache");
256 d
->size
= (int)total_len
;
259 if ((size_t)total_len
> size
)
262 memcpy(buf
, d
->buf
, total_len
);
267 static int filler_sys_devices_system_cpu(const char *path
, void *buf
,
268 fuse_fill_dir_t filler
)
270 __do_free __u32
*bitarr
= NULL
;
271 __do_free
char *cg
= NULL
, *cpuset
= NULL
;
272 __do_closedir
DIR *dir
= NULL
;
273 struct fuse_context
*fc
= fuse_get_context();
274 __u32 last_set_bit
= 0;
276 struct dirent
*dirent
;
279 initpid
= lookup_initpid_in_store(fc
->pid
);
280 if (initpid
<= 1 || is_shared_pidns(initpid
))
283 cg
= get_pid_cgroup(initpid
, "cpuset");
286 prune_init_slice(cg
);
288 cpuset
= get_cpuset(cg
);
292 ret
= cpumask(cpuset
, &bitarr
, &last_set_bit
);
296 for (__u32 bit
= 0; bit
<= last_set_bit
; bit
++) {
299 if (!is_set(bit
, bitarr
))
302 ret
= snprintf(cpu
, sizeof(cpu
), "cpu%u", bit
);
303 if (ret
< 0 || (size_t)ret
>= sizeof(cpu
))
306 if (DIR_FILLER(filler
, buf
, cpu
, NULL
, 0) != 0)
314 while ((dirent
= readdir(dir
))) {
315 char *entry
= dirent
->d_name
;
317 if (strlen(entry
) <= 3)
321 /* Don't emit entries we already filtered above. */
325 if (DIR_FILLER(filler
, buf
, dirent
->d_name
, NULL
, 0) != 0)
332 static int get_st_mode(const char *path
, mode_t
*mode
)
337 ret
= lstat(path
, &sb
);
345 static off_t
get_sysfile_size(const char *which
)
347 __do_fclose
FILE *f
= NULL
;
348 __do_free
char *line
= NULL
;
350 ssize_t sz
, answer
= 0;
352 f
= fopen(which
, "re");
356 while ((sz
= getline(&line
, &len
, f
)) != -1)
362 static int sys_getattr_legacy(const char *path
, struct stat
*sb
)
366 memset(sb
, 0, sizeof(struct stat
));
367 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
370 sb
->st_uid
= sb
->st_gid
= 0;
371 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
372 if (strcmp(path
, "/sys") == 0) {
373 sb
->st_mode
= S_IFDIR
| 00555;
378 if (strcmp(path
, "/sys/devices") == 0) {
379 sb
->st_mode
= S_IFDIR
| 00555;
384 if (strcmp(path
, "/sys/devices/system") == 0) {
385 sb
->st_mode
= S_IFDIR
| 00555;
390 if (strcmp(path
, "/sys/devices/system/cpu") == 0) {
391 sb
->st_mode
= S_IFDIR
| 00555;
396 if (strcmp(path
, "/sys/devices/system/cpu/online") == 0) {
397 sb
->st_size
= get_sysfile_size (path
);
398 sb
->st_mode
= S_IFREG
| 00444;
406 __lxcfs_fuse_ops
int sys_getattr(const char *path
, struct stat
*sb
)
412 if (!liblxcfs_functional())
415 if (!liblxcfs_can_use_sys_cpu())
416 return sys_getattr_legacy(path
, sb
);
418 memset(sb
, 0, sizeof(struct stat
));
419 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
422 sb
->st_uid
= sb
->st_gid
= 0;
423 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
425 ret
= get_st_mode(path
, &st_mode
);
429 if (S_ISDIR(st_mode
)) {
430 sb
->st_mode
= st_mode
;
435 if (S_ISREG(st_mode
) || S_ISLNK(st_mode
)) {
436 sb
->st_size
= get_sysfile_size(path
);
437 sb
->st_mode
= st_mode
;
445 __lxcfs_fuse_ops
int sys_release(const char *path
, struct fuse_file_info
*fi
)
447 do_release_file_info(fi
);
451 __lxcfs_fuse_ops
int sys_releasedir(const char *path
, struct fuse_file_info
*fi
)
453 do_release_file_info(fi
);
457 __lxcfs_fuse_ops
int sys_write(const char *path
, const char *buf
, size_t size
,
458 off_t offset
, struct fuse_file_info
*fi
)
460 __do_close
int fd
= -EBADF
;
461 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
463 if (!liblxcfs_functional())
466 if (f
->type
!= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE
)
469 fd
= open(path
, O_WRONLY
| O_CLOEXEC
);
473 return pwrite(fd
, buf
, size
, offset
);
476 static int sys_readdir_legacy(const char *path
, void *buf
, fuse_fill_dir_t filler
,
477 off_t offset
, struct fuse_file_info
*fi
)
479 if (strcmp(path
, "/sys") == 0) {
480 if (DIR_FILLER(filler
, buf
, ".", NULL
, 0) != 0 ||
481 DIR_FILLER(filler
, buf
, "..", NULL
, 0) != 0 ||
482 DIR_FILLER(filler
, buf
, "devices", NULL
, 0) != 0)
487 if (strcmp(path
, "/sys/devices") == 0) {
488 if (DIR_FILLER(filler
, buf
, ".", NULL
, 0) != 0 ||
489 DIR_FILLER(filler
, buf
, "..", NULL
, 0) != 0 ||
490 DIR_FILLER(filler
, buf
, "system", NULL
, 0) != 0)
495 if (strcmp(path
, "/sys/devices/system") == 0) {
496 if (DIR_FILLER(filler
, buf
, ".", NULL
, 0) != 0 ||
497 DIR_FILLER(filler
, buf
, "..", NULL
, 0) != 0 ||
498 DIR_FILLER(filler
, buf
, "cpu", NULL
, 0) != 0)
503 if (strcmp(path
, "/sys/devices/system/cpu") == 0) {
504 if (DIR_FILLER(filler
, buf
, ".", NULL
, 0) != 0 ||
505 DIR_FILLER(filler
, buf
, "..", NULL
, 0) != 0 ||
506 DIR_FILLER(filler
, buf
, "online", NULL
, 0) != 0)
515 __lxcfs_fuse_ops
int sys_readdir(const char *path
, void *buf
,
516 fuse_fill_dir_t filler
, off_t offset
,
517 struct fuse_file_info
*fi
)
519 __do_closedir
DIR *dir
= NULL
;
520 struct dirent
*dirent
;
521 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
523 if (!liblxcfs_functional())
526 if (!liblxcfs_can_use_sys_cpu())
527 return sys_readdir_legacy(path
, buf
, filler
, offset
, fi
);
530 * When we reload LXCFS and we don't load the lxcfs binary itself
531 * changes to such functions as lxcfs_opendir() aren't reflected so
532 * sys_opendir() doesn't run but sys_readdir() does. We need to account
540 if (DIR_FILLER(filler
, buf
, ".", NULL
, 0) != 0 ||
541 DIR_FILLER(filler
, buf
, "..", NULL
, 0) != 0 ||
542 DIR_FILLER(filler
, buf
, "devices", NULL
, 0) != 0)
547 case LXC_TYPE_SYS_DEVICES
: {
548 if (DIR_FILLER(filler
, buf
, ".", NULL
, 0) != 0 ||
549 DIR_FILLER(filler
, buf
, "..", NULL
, 0) != 0 ||
550 DIR_FILLER(filler
, buf
, "system", NULL
, 0) != 0)
555 case LXC_TYPE_SYS_DEVICES_SYSTEM
: {
556 if (DIR_FILLER(filler
, buf
, ".", NULL
, 0) != 0 ||
557 DIR_FILLER(filler
, buf
, "..", NULL
, 0) != 0 ||
558 DIR_FILLER(filler
, buf
, "cpu", NULL
, 0) != 0)
563 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU
:
564 if (DIR_FILLER(filler
, buf
, ".", NULL
, 0) != 0 ||
565 DIR_FILLER(filler
, buf
, "..", NULL
, 0) != 0)
568 return filler_sys_devices_system_cpu(path
, buf
, filler
);
569 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBDIR
: {
574 while ((dirent
= readdir(dir
))) {
575 if (DIR_FILLER(filler
, buf
, dirent
->d_name
, NULL
, 0) != 0)
586 __lxcfs_fuse_ops
int sys_readlink(const char *path
, char *buf
, size_t size
)
590 if (!liblxcfs_functional())
593 ret
= readlink(path
, buf
, size
);
597 if ((size_t)ret
> size
)
605 static int sys_open_legacy(const char *path
, struct fuse_file_info
*fi
)
607 __do_free
struct file_info
*info
= NULL
;
610 if (strcmp(path
, "/sys/devices") == 0)
611 type
= LXC_TYPE_SYS_DEVICES
;
612 if (strcmp(path
, "/sys/devices/system") == 0)
613 type
= LXC_TYPE_SYS_DEVICES_SYSTEM
;
614 if (strcmp(path
, "/sys/devices/system/cpu") == 0)
615 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU
;
616 if (strcmp(path
, "/sys/devices/system/cpu/online") == 0)
617 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE
;
621 info
= malloc(sizeof(*info
));
625 memset(info
, 0, sizeof(*info
));
628 info
->buflen
= get_sysfile_size(path
) + BUF_RESERVE_SIZE
;
630 info
->buf
= malloc(info
->buflen
);
634 memset(info
->buf
, 0, info
->buflen
);
635 /* set actual size to buffer size */
636 info
->size
= info
->buflen
;
638 fi
->fh
= PTR_TO_UINT64(move_ptr(info
));
642 __lxcfs_fuse_ops
int sys_open(const char *path
, struct fuse_file_info
*fi
)
644 __do_free
struct file_info
*info
= NULL
;
647 if (!liblxcfs_functional())
650 if (!liblxcfs_can_use_sys_cpu())
651 return sys_open_legacy(path
, fi
);
653 if (strcmp(path
, "/sys/devices/system/cpu/online") == 0) {
654 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE
;
655 } else if (strncmp(path
, "/sys/devices/system/cpu/",
656 STRLITERALLEN("/sys/devices/system/cpu/")) == 0) {
660 ret
= get_st_mode(path
, &st_mode
);
664 if (S_ISREG(st_mode
))
665 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE
;
670 info
= malloc(sizeof(*info
));
674 memset(info
, 0, sizeof(*info
));
677 info
->buflen
= get_sysfile_size(path
) + BUF_RESERVE_SIZE
;
679 info
->buf
= malloc(info
->buflen
);
683 memset(info
->buf
, 0, info
->buflen
);
684 /* set actual size to buffer size */
685 info
->size
= info
->buflen
;
687 fi
->fh
= PTR_TO_UINT64(move_ptr(info
));
691 __lxcfs_fuse_ops
int sys_opendir(const char *path
, struct fuse_file_info
*fi
)
693 __do_free
struct file_info
*dir_info
= NULL
;
696 if (!liblxcfs_functional())
699 if (strcmp(path
, "/sys") == 0) {
701 } else if (strcmp(path
, "/sys/devices") == 0) {
702 type
= LXC_TYPE_SYS_DEVICES
;
703 } else if (strcmp(path
, "/sys/devices/system") == 0) {
704 type
= LXC_TYPE_SYS_DEVICES_SYSTEM
;
705 } else if (strcmp(path
, "/sys/devices/system/cpu") == 0) {
706 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU
;
707 } else if (strncmp(path
, "/sys/devices/system/cpu/",
708 STRLITERALLEN("/sys/devices/system/cpu/")) == 0) {
712 ret
= get_st_mode(path
, &st_mode
);
716 if (S_ISDIR(st_mode
))
717 type
= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBDIR
;
722 dir_info
= malloc(sizeof(*dir_info
));
726 memset(dir_info
, 0, sizeof(*dir_info
));
727 dir_info
->type
= type
;
728 dir_info
->buf
= NULL
;
729 dir_info
->file
= NULL
;
730 dir_info
->buflen
= 0;
732 fi
->fh
= PTR_TO_UINT64(move_ptr(dir_info
));
736 static int sys_access_legacy(const char *path
, int mask
)
738 if (strcmp(path
, "/sys") == 0 && access(path
, R_OK
) == 0)
741 if (strcmp(path
, "/sys/devices") == 0 && access(path
, R_OK
) == 0)
744 if (strcmp(path
, "/sys/devices/system") == 0 && access(path
, R_OK
) == 0)
747 if (strcmp(path
, "/sys/devices/system/cpu") == 0 &&
748 access(path
, R_OK
) == 0)
751 /* these are all read-only */
752 if ((mask
& ~R_OK
) != 0)
758 __lxcfs_fuse_ops
int sys_access(const char *path
, int mask
)
760 if (!liblxcfs_functional())
763 if (!liblxcfs_can_use_sys_cpu())
764 return sys_access_legacy(path
, mask
);
766 return access(path
, mask
);
769 static int sys_read_legacy(const char *path
, char *buf
, size_t size
,
770 off_t offset
, struct fuse_file_info
*fi
)
772 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
775 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE
:
776 if (liblxcfs_functional())
777 return sys_devices_system_cpu_online_read(buf
, size
, offset
, fi
);
779 return read_file_fuse_with_offset(LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH
,
780 buf
, size
, offset
, f
);
781 case LXC_TYPE_SYS_DEVICES
:
783 case LXC_TYPE_SYS_DEVICES_SYSTEM
:
785 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU
:
792 __lxcfs_fuse_ops
int sys_read(const char *path
, char *buf
, size_t size
,
793 off_t offset
, struct fuse_file_info
*fi
)
795 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
797 if (!liblxcfs_functional())
800 if (!liblxcfs_can_use_sys_cpu())
801 return sys_read_legacy(path
, buf
, size
, offset
, fi
);
804 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE
:
805 return sys_devices_system_cpu_online_read(buf
, size
, offset
, fi
);
806 case LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_SUBFILE
:
807 return read_file_fuse_with_offset(path
, buf
, size
, offset
, f
);