1 /* SPDX-License-Identifier: LGPL-2.1+ */
21 #include <linux/magic.h>
22 #include <linux/sched.h>
23 #include <sys/epoll.h>
25 #include <sys/mount.h>
26 #include <sys/param.h>
27 #include <sys/personality.h>
28 #include <sys/socket.h>
29 #include <sys/syscall.h>
30 #include <sys/sysinfo.h>
33 #include "proc_fuse.h"
36 #include "cgroup_fuse.h"
37 #include "cgroups/cgroup.h"
38 #include "cgroups/cgroup_utils.h"
39 #include "cpuset_parse.h"
40 #include "lxcfs_fuse_compat.h"
41 #include "memory_utils.h"
42 #include "proc_loadavg.h"
43 #include "proc_cpuview.h"
47 uint64_t hierarchical_memory_limit
;
48 uint64_t hierarchical_memsw_limit
;
51 uint64_t total_rss_huge
;
53 uint64_t total_mapped_file
;
55 uint64_t total_writeback
;
57 uint64_t total_pgpgin
;
58 uint64_t total_pgpgout
;
59 uint64_t total_pgfault
;
60 uint64_t total_pgmajfault
;
61 uint64_t total_inactive_anon
;
62 uint64_t total_active_anon
;
63 uint64_t total_inactive_file
;
64 uint64_t total_active_file
;
65 uint64_t total_unevictable
;
68 static off_t
get_procfile_size(const char *path
)
70 __do_fclose
FILE *f
= NULL
;
71 __do_free
char *line
= NULL
;
73 ssize_t sz
, answer
= 0;
75 f
= fopen(path
, "re");
79 while ((sz
= getline(&line
, &len
, f
)) != -1)
85 static off_t
get_procfile_size_with_personality(const char *path
)
87 struct fuse_context
*fc
= fuse_get_context();
88 __u32 host_personality
= liblxcfs_personality(), caller_personality
;
89 bool change_personality
;
91 off_t procfile_size_ret
;
93 if (get_task_personality(fc
->pid
, &caller_personality
) < 0)
94 return log_error(0, "Failed to get caller process (pid: %d) personality", fc
->pid
);
96 /* do we need to change thread personality? */
97 change_personality
= host_personality
!= caller_personality
;
99 if (change_personality
) {
100 ret
= personality(caller_personality
);
102 return log_error(0, "Call to personality(%d) failed: %s\n",
103 caller_personality
, strerror(errno
));
105 lxcfs_debug("task (tid: %d) personality was changed %d -> %d\n",
106 (int)syscall(SYS_gettid
), ret
, caller_personality
);
109 procfile_size_ret
= get_procfile_size(path
);
111 if (change_personality
) {
112 ret
= personality(host_personality
);
114 return log_error(0, "Call to personality(%d) failed: %s\n",
115 host_personality
, strerror(errno
));
117 lxcfs_debug("task (tid: %d) personality was restored %d -> %d\n",
118 (int)syscall(SYS_gettid
), ret
, host_personality
);
121 return procfile_size_ret
;
124 __lxcfs_fuse_ops
int proc_getattr(const char *path
, struct stat
*sb
)
128 memset(sb
, 0, sizeof(struct stat
));
129 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
132 sb
->st_uid
= sb
->st_gid
= 0;
133 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
134 if (strcmp(path
, "/proc") == 0) {
135 sb
->st_mode
= S_IFDIR
| 00555;
140 if (strcmp(path
, "/proc/meminfo") == 0 ||
141 strcmp(path
, "/proc/cpuinfo") == 0 ||
142 strcmp(path
, "/proc/uptime") == 0 ||
143 strcmp(path
, "/proc/stat") == 0 ||
144 strcmp(path
, "/proc/diskstats") == 0 ||
145 strcmp(path
, "/proc/swaps") == 0 ||
146 strcmp(path
, "/proc/loadavg") == 0 ||
147 strcmp(path
, "/proc/slabinfo") == 0) {
148 if (liblxcfs_functional())
149 sb
->st_size
= get_procfile_size_with_personality(path
);
151 sb
->st_size
= get_procfile_size(path
);
152 sb
->st_mode
= S_IFREG
| 00444;
160 __lxcfs_fuse_ops
int proc_readdir(const char *path
, void *buf
,
161 fuse_fill_dir_t filler
, off_t offset
,
162 struct fuse_file_info
*fi
)
164 if (dir_filler(filler
, buf
, ".", 0) != 0 ||
165 dir_filler(filler
, buf
, "..", 0) != 0 ||
166 dir_filler(filler
, buf
, "cpuinfo", 0) != 0 ||
167 dir_filler(filler
, buf
, "meminfo", 0) != 0 ||
168 dir_filler(filler
, buf
, "stat", 0) != 0 ||
169 dir_filler(filler
, buf
, "uptime", 0) != 0 ||
170 dir_filler(filler
, buf
, "diskstats", 0) != 0 ||
171 dir_filler(filler
, buf
, "swaps", 0) != 0 ||
172 dir_filler(filler
, buf
, "loadavg", 0) != 0 ||
173 dir_filler(filler
, buf
, "slabinfo", 0) != 0)
179 __lxcfs_fuse_ops
int proc_open(const char *path
, struct fuse_file_info
*fi
)
181 __do_free
struct file_info
*info
= NULL
;
184 if (strcmp(path
, "/proc/meminfo") == 0)
185 type
= LXC_TYPE_PROC_MEMINFO
;
186 else if (strcmp(path
, "/proc/cpuinfo") == 0)
187 type
= LXC_TYPE_PROC_CPUINFO
;
188 else if (strcmp(path
, "/proc/uptime") == 0)
189 type
= LXC_TYPE_PROC_UPTIME
;
190 else if (strcmp(path
, "/proc/stat") == 0)
191 type
= LXC_TYPE_PROC_STAT
;
192 else if (strcmp(path
, "/proc/diskstats") == 0)
193 type
= LXC_TYPE_PROC_DISKSTATS
;
194 else if (strcmp(path
, "/proc/swaps") == 0)
195 type
= LXC_TYPE_PROC_SWAPS
;
196 else if (strcmp(path
, "/proc/loadavg") == 0)
197 type
= LXC_TYPE_PROC_LOADAVG
;
198 else if (strcmp(path
, "/proc/slabinfo") == 0)
199 type
= LXC_TYPE_PROC_SLABINFO
;
203 info
= zalloc(sizeof(*info
));
209 if (liblxcfs_functional())
210 info
->buflen
= get_procfile_size_with_personality(path
) + BUF_RESERVE_SIZE
;
212 info
->buflen
= get_procfile_size(path
) + BUF_RESERVE_SIZE
;
214 info
->buf
= zalloc(info
->buflen
);
217 /* set actual size to buffer size */
218 info
->size
= info
->buflen
;
220 fi
->fh
= PTR_TO_UINT64(move_ptr(info
));
224 __lxcfs_fuse_ops
int proc_access(const char *path
, int mask
)
226 if (strcmp(path
, "/proc") == 0 && access(path
, R_OK
) == 0)
229 /* these are all read-only */
230 if ((mask
& ~R_OK
) != 0)
236 __lxcfs_fuse_ops
int proc_release(const char *path
, struct fuse_file_info
*fi
)
238 do_release_file_info(fi
);
243 * Gets a non-hierarchical memory controller limit, or UINT64_MAX if no limit is
244 * in place. If `swap` is true, reads 'swap' (v2) or 'memsw' (v1); otherwise
245 * reads the memory (RAM) limits.
247 * @returns 0 on success (and sets `*limit`), < 0 on error
249 static int get_memlimit(const char *cgroup
, bool swap
, uint64_t *limit
)
251 __do_free
char *memlimit_str
= NULL
;
252 uint64_t memlimit
= UINT64_MAX
;
256 ret
= cgroup_ops
->get_memory_swap_max(cgroup_ops
, cgroup
, &memlimit_str
);
258 ret
= cgroup_ops
->get_memory_max(cgroup_ops
, cgroup
, &memlimit_str
);
263 if (memlimit_str
[0]) {
264 ret
= safe_uint64(memlimit_str
, &memlimit
, 10);
266 lxcfs_error("Failed to convert memory%s.max=%s for cgroup %s",
267 swap
? ".swap" : "", memlimit_str
, cgroup
);
276 * This function taken from glibc-2.32, as POSIX dirname("/some-dir") will
277 * return "/some-dir" as opposed to "/", which breaks `get_min_memlimit()`
279 static char *gnu_dirname(char *path
)
281 static const char dot
[] = ".";
285 last_slash
= path
!= NULL
? strrchr(path
, '/') : NULL
;
287 if (last_slash
!= NULL
&& last_slash
!= path
&& last_slash
[1] == '\0') {
288 /* Determine whether all remaining characters are slashes. */
291 for (runp
= last_slash
; runp
!= path
; --runp
)
295 /* The '/' is the last character, we have to look further. */
297 last_slash
= memrchr(path
, '/', runp
- path
);
300 if (last_slash
!= NULL
) {
301 /* Determine whether all remaining characters are slashes. */
304 for (runp
= last_slash
; runp
!= path
; --runp
)
308 /* Terminate the path. */
311 * The last slash is the first character in the string.
312 * We have to return "/". As a special case we have to
313 * return "//" if there are exactly two slashes at the
314 * beginning of the string. See XBD 4.10 Path Name
315 * Resolution for more information
317 if (last_slash
== path
+ 1)
320 last_slash
= path
+ 1;
324 last_slash
[0] = '\0';
327 * This assignment is ill-designed but the XPG specs require to
328 * return a string containing "." in any case no directory part
329 * is found and so a static and constant string is required.
338 * Gets a hierarchical memory controller limit, or UINT64_MAX if no limit is
339 * in place. If `swap` is true, reads 'swap' (v2) or 'memsw' (v1); otherwise
340 * reads the memory (RAM) limits.
342 * @returns 0 on success (and sets `*limit`), < 0 on error
344 static int get_min_memlimit(const char *cgroup
, bool swap
, uint64_t *limit
)
346 __do_free
char *copy
= NULL
;
347 uint64_t memlimit
= UINT64_MAX
, retlimit
= UINT64_MAX
;
350 copy
= strdup(cgroup
);
352 return log_error_errno(0, ENOMEM
, "Failed to allocate memory");
354 ret
= get_memlimit(copy
, swap
, &retlimit
);
359 * If the cgroup doesn't start with / (probably won't happen), dirname()
360 * will terminate with "" instead of "/"
362 while (retlimit
!= 0 && *copy
&& strcmp(copy
, "/") != 0) {
365 it
= gnu_dirname(it
);
366 ret
= get_memlimit(it
, swap
, &memlimit
);
369 if (memlimit
< retlimit
)
377 static inline bool startswith(const char *line
, const char *pref
)
379 return strncmp(line
, pref
, strlen(pref
)) == 0;
382 static void get_swap_info(const char *cgroup
, uint64_t memlimit
,
383 uint64_t memusage
, uint64_t *swtotal
,
384 uint64_t *swusage
, uint64_t *memswpriority
)
386 __do_free
char *memswusage_str
= NULL
, *memswpriority_str
= NULL
;
387 uint64_t memswlimit
= 0, memswusage
= 0;
390 *swtotal
= *swusage
= 0;
393 ret
= get_min_memlimit(cgroup
, true, &memswlimit
);
396 ret
= cgroup_ops
->get_memory_swap_current(cgroup_ops
, cgroup
, &memswusage_str
);
397 if (ret
< 0 || safe_uint64(memswusage_str
, &memswusage
, 10) < 0)
400 if (liblxcfs_memory_is_cgroupv2()) {
401 *swtotal
= memswlimit
/ 1024;
402 *swusage
= memswusage
/ 1024;
404 if (memlimit
> memswlimit
)
407 *swtotal
= (memswlimit
- memlimit
) / 1024;
408 if (memusage
> memswusage
|| *swtotal
== 0)
411 *swusage
= (memswusage
- memusage
) / 1024;
414 ret
= cgroup_ops
->get_memory_swappiness(cgroup_ops
, cgroup
, &memswpriority_str
);
416 safe_uint64(memswpriority_str
, memswpriority
, 10);
419 static int proc_swaps_read(char *buf
, size_t size
, off_t offset
,
420 struct fuse_file_info
*fi
)
422 __do_free
char *cgroup
= NULL
, *memusage_str
= NULL
,
423 *memswusage_str
= NULL
, *memswpriority_str
= NULL
;
424 struct fuse_context
*fc
= fuse_get_context();
425 bool wants_swap
= lxcfs_has_opt(fuse_get_context()->private_data
, LXCFS_SWAP_ON
);
426 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
427 uint64_t memlimit
= 0, memusage
= 0,
428 swtotal
= 0, swusage
= 0, memswpriority
= 1,
429 hostswtotal
= 0, hostswfree
= 0;
430 ssize_t total_len
= 0;
432 char *cache
= d
->buf
;
434 __do_free
char *line
= NULL
;
435 __do_free
void *fopen_cache
= NULL
;
436 __do_fclose
FILE *f
= NULL
;
442 if (offset
> d
->size
)
448 left
= d
->size
- offset
;
449 total_len
= left
> size
? size
: left
;
450 memcpy(buf
, cache
+ offset
, total_len
);
455 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
456 if (initpid
<= 1 || is_shared_pidns(initpid
))
459 cgroup
= get_pid_cgroup(initpid
, "memory");
461 return read_file_fuse("/proc/swaps", buf
, size
, d
);
462 prune_init_slice(cgroup
);
464 ret
= get_min_memlimit(cgroup
, false, &memlimit
);
467 ret
= cgroup_ops
->get_memory_current(cgroup_ops
, cgroup
, &memusage_str
);
470 if (safe_uint64(memusage_str
, &memusage
, 10) < 0)
471 lxcfs_error("Failed to convert memusage %s", memusage_str
);
474 get_swap_info(cgroup
, memlimit
, memusage
, &swtotal
, &swusage
, &memswpriority
);
476 total_len
= snprintf(d
->buf
, d
->size
, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
478 /* Read host total and free values */
479 f
= fopen_cached("/proc/meminfo", "re", &fopen_cache
);
483 while (getline(&line
, &linelen
, f
) != -1) {
484 if (startswith(line
, "SwapTotal:"))
485 sscanf(line
, "SwapTotal: %8" PRIu64
" kB", &hostswtotal
);
486 else if (startswith(line
, "SwapFree:"))
487 sscanf(line
, "SwapFree: %8" PRIu64
" kB", &hostswfree
);
491 /* For cgroups v1, the total amount of swap is always reported to be the
492 lesser of the RAM+SWAP limit or the SWAP device size.
493 This is because the kernel can swap as much as it
494 wants and not only up to swtotal. */
495 if (!liblxcfs_memory_is_cgroupv2())
496 swtotal
= memlimit
/ 1024 + swtotal
;
498 if (hostswtotal
< swtotal
) {
499 swtotal
= hostswtotal
;
502 /* When swappiness is 0, pretend we can't swap. */
503 if (memswpriority
== 0) {
509 l
= snprintf(d
->buf
+ total_len
, d
->size
- total_len
,
510 "none%*svirtual\t\t%" PRIu64
"\t%" PRIu64
"\t0\n",
511 36, " ", swtotal
, swusage
);
515 if (total_len
< 0 || l
< 0)
516 return log_error(0, "Failed writing to cache");
519 d
->size
= (int)total_len
;
521 if ((size_t)total_len
> size
)
523 memcpy(buf
, d
->buf
, total_len
);
528 static void get_blkio_io_value(char *str
, unsigned major
, unsigned minor
,
529 char *iotype
, uint64_t *v
)
536 snprintf(key
, 32, "%u:%u %s", major
, minor
, iotype
);
541 if (startswith(str
, key
)) {
542 sscanf(str
+ len
, "%" PRIu64
, v
);
545 eol
= strchr(str
, '\n');
552 struct lxcfs_diskstats
{
553 unsigned int major
; /* 1 - major number */
554 unsigned int minor
; /* 2 - minor mumber */
555 char dev_name
[72]; /* 3 - device name */
556 uint64_t read
; /* 4 - reads completed successfully */
557 uint64_t read_merged
; /* 5 - reads merged */
558 uint64_t read_sectors
; /* 6 - sectors read */
559 uint64_t read_ticks
; /* 7 - time spent reading (ms) */
560 uint64_t write
; /* 8 - writes completed */
561 uint64_t write_merged
; /* 9 - writes merged */
562 uint64_t write_sectors
; /* 10 - sectors written */
563 uint64_t write_ticks
; /* 11 - time spent writing (ms) */
564 uint64_t ios_pgr
; /* 12 - I/Os currently in progress */
565 uint64_t total_ticks
; /* 13 - time spent doing I/Os (ms) */
566 uint64_t rq_ticks
; /* 14 - weighted time spent doing I/Os (ms) */
567 uint64_t discard
; /* 15 - discards completed successfully (4.18+) */
568 uint64_t discard_merged
; /* 16 - discards merged (4.18+) */
569 uint64_t discard_sectors
; /* 17 - sectors discarded (4.18+) */
570 uint64_t discard_ticks
; /* 18 - time spent discarding (4.18+) */
573 static int proc_diskstats_read(char *buf
, size_t size
, off_t offset
,
574 struct fuse_file_info
*fi
)
576 __do_free
char *cg
= NULL
, *io_serviced_str
= NULL
,
577 *io_merged_str
= NULL
, *io_service_bytes_str
= NULL
,
578 *io_wait_time_str
= NULL
, *io_service_time_str
= NULL
,
580 __do_free
void *fopen_cache
= NULL
;
581 __do_fclose
FILE *f
= NULL
;
582 struct fuse_context
*fc
= fuse_get_context();
583 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
584 struct lxcfs_diskstats stats
= {};
586 uint64_t read_service_time
, write_service_time
, discard_service_time
, read_wait_time
,
587 write_wait_time
, discard_wait_time
;
588 char *cache
= d
->buf
;
589 size_t cache_size
= d
->buflen
;
590 size_t linelen
= 0, total_len
= 0;
597 if (offset
> d
->size
)
603 left
= d
->size
- offset
;
604 total_len
= left
> size
? size
: left
;
605 memcpy(buf
, cache
+ offset
, total_len
);
610 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
611 if (initpid
<= 1 || is_shared_pidns(initpid
))
614 cg
= get_pid_cgroup(initpid
, "blkio");
616 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
617 prune_init_slice(cg
);
619 ret
= cgroup_ops
->get_io_serviced(cgroup_ops
, cg
, &io_serviced_str
);
621 if (ret
== -EOPNOTSUPP
)
622 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
625 ret
= cgroup_ops
->get_io_merged(cgroup_ops
, cg
, &io_merged_str
);
627 if (ret
== -EOPNOTSUPP
)
628 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
631 ret
= cgroup_ops
->get_io_service_bytes(cgroup_ops
, cg
, &io_service_bytes_str
);
633 if (ret
== -EOPNOTSUPP
)
634 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
637 ret
= cgroup_ops
->get_io_wait_time(cgroup_ops
, cg
, &io_wait_time_str
);
639 if (ret
== -EOPNOTSUPP
)
640 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
643 ret
= cgroup_ops
->get_io_service_time(cgroup_ops
, cg
, &io_service_time_str
);
645 if (ret
== -EOPNOTSUPP
)
646 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
649 f
= fopen_cached("/proc/diskstats", "re", &fopen_cache
);
653 while (getline(&line
, &linelen
, f
) != -1) {
657 i
= sscanf(line
, "%u %u %71s", &stats
.major
, &stats
.minor
, stats
.dev_name
);
661 get_blkio_io_value(io_serviced_str
, stats
.major
, stats
.minor
, "Read", &stats
.read
);
662 get_blkio_io_value(io_serviced_str
, stats
.major
, stats
.minor
, "Write", &stats
.write
);
663 get_blkio_io_value(io_serviced_str
, stats
.major
, stats
.minor
, "Discard", &stats
.discard
);
665 get_blkio_io_value(io_merged_str
, stats
.major
, stats
.minor
, "Read", &stats
.read_merged
);
666 get_blkio_io_value(io_merged_str
, stats
.major
, stats
.minor
, "Write", &stats
.write_merged
);
667 get_blkio_io_value(io_merged_str
, stats
.major
, stats
.minor
, "Discard", &stats
.discard_merged
);
669 get_blkio_io_value(io_service_bytes_str
, stats
.major
, stats
.minor
, "Read", &stats
.read_sectors
);
670 stats
.read_sectors
= stats
.read_sectors
/ 512;
671 get_blkio_io_value(io_service_bytes_str
, stats
.major
, stats
.minor
, "Write", &stats
.write_sectors
);
672 stats
.write_sectors
= stats
.write_sectors
/ 512;
673 get_blkio_io_value(io_service_bytes_str
, stats
.major
, stats
.minor
, "Discard", &stats
.discard_sectors
);
674 stats
.discard_sectors
= stats
.discard_sectors
/ 512;
676 get_blkio_io_value(io_service_time_str
, stats
.major
, stats
.minor
, "Read", &read_service_time
);
677 read_service_time
= read_service_time
/ 1000000;
678 get_blkio_io_value(io_wait_time_str
, stats
.major
, stats
.minor
, "Read", &read_wait_time
);
679 read_wait_time
= read_wait_time
/ 1000000;
680 stats
.read_ticks
= read_service_time
+ read_wait_time
;
682 get_blkio_io_value(io_service_time_str
, stats
.major
, stats
.minor
, "Write", &write_service_time
);
683 write_service_time
= write_service_time
/ 1000000;
684 get_blkio_io_value(io_wait_time_str
, stats
.major
, stats
.minor
, "Write", &write_wait_time
);
685 write_wait_time
= write_wait_time
/ 1000000;
686 stats
.write_ticks
= write_service_time
+ write_wait_time
;
688 get_blkio_io_value(io_service_time_str
, stats
.major
, stats
.minor
, "Discard", &discard_service_time
);
689 discard_service_time
= discard_service_time
/ 1000000;
690 get_blkio_io_value(io_wait_time_str
, stats
.major
, stats
.minor
, "Discard", &discard_wait_time
);
691 discard_wait_time
= discard_wait_time
/ 1000000;
692 stats
.discard_ticks
= discard_service_time
+ discard_wait_time
;
694 get_blkio_io_value(io_service_time_str
, stats
.major
, stats
.minor
, "Total", &stats
.total_ticks
);
695 stats
.total_ticks
= stats
.total_ticks
/ 1000000;
697 memset(lbuf
, 0, sizeof(lbuf
));
698 if (stats
.read
|| stats
.write
|| stats
.read_merged
|| stats
.write_merged
||
699 stats
.read_sectors
|| stats
.write_sectors
|| stats
.read_ticks
||
700 stats
.write_ticks
|| stats
.ios_pgr
|| stats
.total_ticks
|| stats
.rq_ticks
|| stats
.discard
||
701 stats
.discard_merged
|| stats
.discard_sectors
|| stats
.discard_ticks
) {
705 "%u %u" /* major, minor */
707 " %" PRIu64
/* read */
708 " %" PRIu64
/* read_merged */
709 " %" PRIu64
/* read_sectors */
710 " %" PRIu64
/* read_ticks */
711 " %" PRIu64
/* write */
712 " %" PRIu64
/* write_merged */
713 " %" PRIu64
/* write_sectors */
714 " %" PRIu64
/* write_ticks */
715 " %" PRIu64
/* ios_pgr */
716 " %" PRIu64
/* total_ticks */
717 " %" PRIu64
/* rq_ticks */
718 " %" PRIu64
/* discard */
719 " %" PRIu64
/* discard_merged */
720 " %" PRIu64
/* discard_sectors */
721 " %" PRIu64
/* discard_ticks */
738 stats
.discard_merged
,
739 stats
.discard_sectors
,
740 stats
.discard_ticks
);
742 lxcfs_error("Insufficient buffer for %u:%u %s diskstats",
743 stats
.major
, stats
.minor
, stats
.dev_name
);
750 l
= snprintf(cache
, cache_size
, "%s", lbuf
);
752 return log_error(0, "Failed to write cache");
753 if ((size_t)l
>= cache_size
)
754 return log_error(0, "Write to cache was truncated");
763 if (total_len
> size
)
765 memcpy(buf
, d
->buf
, total_len
);
771 static inline void iwashere(void)
773 mknod("/tmp/lxcfs-iwashere", S_IFREG
, 0644);
778 * This function retrieves the busy time of a group of tasks by looking at
779 * cpuacct.usage. Unfortunately, this only makes sense when the container has
780 * been given it's own cpuacct cgroup. If not, this function will take the busy
781 * time of all other taks that do not actually belong to the container into
782 * account as well. If someone has a clever solution for this please send a
785 static double get_reaper_busy(pid_t task
)
787 __do_free
char *cgroup
= NULL
, *usage_str
= NULL
;
791 initpid
= lookup_initpid_in_store(task
);
795 cgroup
= get_pid_cgroup(initpid
, "cpuacct");
798 prune_init_slice(cgroup
);
800 if (!cgroup_ops
->get(cgroup_ops
, "cpuacct", cgroup
, "cpuacct.usage", &usage_str
))
803 if (safe_uint64(usage_str
, &usage
, 10) < 0)
804 lxcfs_error("Failed to convert usage %s", usage_str
);
806 return ((double)usage
/ 1000000000);
809 static uint64_t get_reaper_start_time(pid_t pid
)
811 __do_free
void *fopen_cache
= NULL
;
812 __do_fclose
FILE *f
= NULL
;
815 char path
[STRLITERALLEN("/proc/") + LXCFS_NUMSTRLEN64
+
816 STRLITERALLEN("/stat") + 1];
819 qpid
= lookup_initpid_in_store(pid
);
821 return ret_errno(EINVAL
);
823 ret
= snprintf(path
, sizeof(path
), "/proc/%d/stat", qpid
);
824 if (ret
< 0 || (size_t)ret
>= sizeof(path
))
825 return ret_errno(EINVAL
);
827 f
= fopen_cached(path
, "re", &fopen_cache
);
829 return ret_errno(EINVAL
);
831 /* Note that the *scanf() argument supression requires that length
832 * modifiers such as "l" are omitted. Otherwise some compilers will yell
833 * at us. It's like telling someone you're not married and then asking
834 * if you can bring your wife to the party.
836 ret
= fscanf(f
, "%*d " /* (1) pid %d */
837 "%*s " /* (2) comm %s */
838 "%*c " /* (3) state %c */
839 "%*d " /* (4) ppid %d */
840 "%*d " /* (5) pgrp %d */
841 "%*d " /* (6) session %d */
842 "%*d " /* (7) tty_nr %d */
843 "%*d " /* (8) tpgid %d */
844 "%*u " /* (9) flags %u */
845 "%*u " /* (10) minflt %lu */
846 "%*u " /* (11) cminflt %lu */
847 "%*u " /* (12) majflt %lu */
848 "%*u " /* (13) cmajflt %lu */
849 "%*u " /* (14) utime %lu */
850 "%*u " /* (15) stime %lu */
851 "%*d " /* (16) cutime %ld */
852 "%*d " /* (17) cstime %ld */
853 "%*d " /* (18) priority %ld */
854 "%*d " /* (19) nice %ld */
855 "%*d " /* (20) num_threads %ld */
856 "%*d " /* (21) itrealvalue %ld */
857 "%" PRIu64
, /* (22) starttime %llu */
860 return ret_errno(EINVAL
);
862 return ret_set_errno(starttime
, 0);
865 static double get_reaper_start_time_in_sec(pid_t pid
)
867 uint64_t clockticks
, ticks_per_sec
;
871 clockticks
= get_reaper_start_time(pid
);
873 return log_debug(0, "Failed to retrieve start time of pid %d", pid
);
875 ret
= sysconf(_SC_CLK_TCK
);
877 return log_debug(0, "Failed to determine number of clock ticks in a second");
879 ticks_per_sec
= (uint64_t)ret
;
880 res
= (double)clockticks
/ ticks_per_sec
;
884 static double get_reaper_age(pid_t pid
)
887 double procstart
, procage
;
890 * We need to substract the time the process has started since system
891 * boot minus the time when the system has started to get the actual
894 procstart
= get_reaper_start_time_in_sec(pid
);
898 struct timespec spec
;
900 ret
= clock_gettime(CLOCK_BOOTTIME
, &spec
);
904 uptime_ms
= (spec
.tv_sec
* 1000) + (spec
.tv_nsec
* 1e-6);
905 procage
= (uptime_ms
- (procstart
* 1000)) / 1000;
912 * We read /proc/uptime and reuse its second field.
913 * For the first field, we use the mtime for the reaper for
914 * the calling pid as returned by getreaperage
916 static int proc_uptime_read(char *buf
, size_t size
, off_t offset
,
917 struct fuse_file_info
*fi
)
919 struct fuse_context
*fc
= fuse_get_context();
920 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
921 char *cache
= d
->buf
;
922 ssize_t total_len
= 0, ret
= 0;
923 double busytime
, idletime
, reaperage
;
932 if (offset
> d
->size
)
938 left
= d
->size
- offset
;
939 total_len
= left
> size
? size
: left
;
940 memcpy(buf
, cache
+ offset
, total_len
);
945 reaperage
= get_reaper_age(fc
->pid
);
947 * To understand why this is done, please read the comment to the
948 * get_reaper_busy() function.
950 idletime
= reaperage
;
951 busytime
= get_reaper_busy(fc
->pid
);
952 if (reaperage
>= busytime
)
953 idletime
= reaperage
- busytime
;
955 ret
= snprintf(d
->buf
, d
->buflen
, "%.2lf %.2lf\n", reaperage
, idletime
);
956 if (ret
< 0 || ret
>= d
->buflen
)
957 return read_file_fuse("/proc/uptime", buf
, size
, d
);
962 if ((size_t)total_len
> size
)
964 memcpy(buf
, d
->buf
, total_len
);
969 #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2)
970 static int proc_stat_read(char *buf
, size_t size
, off_t offset
,
971 struct fuse_file_info
*fi
)
973 __do_free
char *cg
= NULL
, *cpu_cg
= NULL
, *cpuset
= NULL
, *line
= NULL
;
974 __do_free
void *fopen_cache
= NULL
;
975 __do_free
struct cpuacct_usage
*cg_cpu_usage
= NULL
;
976 __do_fclose
FILE *f
= NULL
;
977 struct fuse_context
*fc
= fuse_get_context();
978 struct lxcfs_opts
*opts
= (struct lxcfs_opts
*)fc
->private_data
;
979 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
980 size_t linelen
= 0, total_len
= 0;
981 int curcpu
= -1; /* cpu numbering starts at 0 */
983 uint64_t user
= 0, nice
= 0, system
= 0, idle
= 0, iowait
= 0, irq
= 0,
984 softirq
= 0, steal
= 0, guest
= 0, guest_nice
= 0;
985 uint64_t user_sum
= 0, nice_sum
= 0, system_sum
= 0, idle_sum
= 0,
986 iowait_sum
= 0, irq_sum
= 0, softirq_sum
= 0, steal_sum
= 0,
987 guest_sum
= 0, guest_nice_sum
= 0;
988 char cpuall
[CPUALL_MAX_SIZE
];
989 /* reserve for cpu all */
990 char *cache
= d
->buf
+ CPUALL_MAX_SIZE
;
991 size_t cache_size
= d
->buflen
- CPUALL_MAX_SIZE
;
992 int cg_cpu_usage_size
= 0;
997 if (offset
> d
->size
)
1003 left
= d
->size
- offset
;
1004 total_len
= left
> size
? size
: left
;
1005 memcpy(buf
, d
->buf
+ offset
, total_len
);
1010 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
1011 if (initpid
<= 1 || is_shared_pidns(initpid
))
1015 * when container run with host pid namespace initpid == 1, cgroup will "/"
1016 * we should return host os's /proc contents.
1017 * in some case cpuacct_usage.all in "/" will larger then /proc/stat
1020 return read_file_fuse("/proc/stat", buf
, size
, d
);
1022 cg
= get_pid_cgroup(initpid
, "cpuset");
1024 return read_file_fuse("/proc/stat", buf
, size
, d
);
1025 prune_init_slice(cg
);
1026 cpu_cg
= get_pid_cgroup(initpid
, "cpu");
1028 return read_file_fuse("/proc/stat", buf
, size
, d
);
1029 prune_init_slice(cpu_cg
);
1030 cpuset
= get_cpuset(cg
);
1034 f
= fopen_cached("/proc/stat", "re", &fopen_cache
);
1038 /* Skip first system cpu line. */
1039 if (getline(&line
, &linelen
, f
) < 0)
1040 return log_error(0, "proc_stat_read read first line failed");
1043 * Read cpuacct.usage_all for all CPUs.
1044 * If the cpuacct cgroup is present, it is used to calculate the container's
1045 * CPU usage. If not, values from the host's /proc/stat are used.
1047 if (read_cpuacct_usage_all(cg
, cpuset
, &cg_cpu_usage
, &cg_cpu_usage_size
) == 0) {
1048 if (cgroup_ops
->can_use_cpuview(cgroup_ops
) && opts
&& opts
->use_cfs
) {
1049 total_len
= cpuview_proc_stat(cg
, cpu_cg
, cpuset
, cg_cpu_usage
,
1050 cg_cpu_usage_size
, f
,
1055 lxcfs_v("proc_stat_read failed to read from cpuacct, falling back to the host's /proc/stat");
1058 while (getline(&line
, &linelen
, f
) != -1) {
1060 char cpu_char
[10]; /* That's a lot of cores */
1062 uint64_t all_used
, cg_used
, new_idle
;
1063 int ret
, cpu_to_render
;
1065 if (strlen(line
) == 0)
1067 if (sscanf(line
, "cpu%9[^ ]", cpu_char
) != 1) {
1068 /* not a ^cpuN line containing a number N, just print it */
1069 l
= snprintf(cache
, cache_size
, "%s", line
);
1071 return log_error(0, "Failed to write cache");
1072 if ((size_t)l
>= cache_size
)
1073 return log_error(0, "Write to cache was truncated");
1082 if (sscanf(cpu_char
, "%d", &physcpu
) != 1)
1085 if (!cpu_in_cpuset(physcpu
, cpuset
))
1090 if (cgroup_ops
->can_use_cpuview(cgroup_ops
) && opts
&& opts
->use_cfs
)
1091 cpu_to_render
= curcpu
;
1093 cpu_to_render
= physcpu
;
1098 " %" PRIu64
/* user */
1099 " %" PRIu64
/* nice */
1100 " %" PRIu64
/* system */
1101 " %" PRIu64
/* idle */
1102 " %" PRIu64
/* iowait */
1103 " %" PRIu64
/* irq */
1104 " %" PRIu64
/* softirq */
1105 " %" PRIu64
/* steal */
1106 " %" PRIu64
/* guest */
1107 " %" PRIu64
, /* guest_nice */
1118 if (ret
!= 10 || !cg_cpu_usage
) {
1119 c
= strchr(line
, ' ');
1123 l
= snprintf(cache
, cache_size
, "cpu%d%s", cpu_to_render
, c
);
1125 return log_error(0, "Failed to write cache");
1126 if ((size_t)l
>= cache_size
)
1127 return log_error(0, "Write to cache was truncated");
1138 if (physcpu
>= cg_cpu_usage_size
)
1141 all_used
= user
+ nice
+ system
+ iowait
+ irq
+ softirq
+ steal
+ guest
+ guest_nice
;
1142 cg_used
= cg_cpu_usage
[physcpu
].user
+ cg_cpu_usage
[physcpu
].system
;
1144 if (all_used
>= cg_used
) {
1145 new_idle
= idle
+ (all_used
- cg_used
);
1147 lxcfs_debug("cpu%d from %s has unexpected cpu time: %" PRIu64
" in /proc/stat, %" PRIu64
" in cpuacct.usage_all; unable to determine idle time",
1148 cpu_to_render
, cg
, all_used
, cg_used
);
1152 l
= snprintf(cache
, cache_size
,
1153 "cpu%d %" PRIu64
" 0 %" PRIu64
" %" PRIu64
" 0 0 0 0 0 0\n",
1154 cpu_to_render
, cg_cpu_usage
[physcpu
].user
,
1155 cg_cpu_usage
[physcpu
].system
, new_idle
);
1157 return log_error(0, "Failed to write cache");
1158 if ((size_t)l
>= cache_size
)
1159 return log_error(0, "Write to cache was truncated");
1165 user_sum
+= cg_cpu_usage
[physcpu
].user
;
1166 system_sum
+= cg_cpu_usage
[physcpu
].system
;
1167 idle_sum
+= new_idle
;
1171 system_sum
+= system
;
1173 iowait_sum
+= iowait
;
1175 softirq_sum
+= softirq
;
1178 guest_nice_sum
+= guest_nice
;
1184 int cpuall_len
= snprintf(
1188 " %" PRIu64
/* user_sum */
1189 " %" PRIu64
/* nice_sum */
1190 " %" PRIu64
/* system_sum */
1191 " %" PRIu64
/* idle_sum */
1192 " %" PRIu64
/* iowait_sum */
1193 " %" PRIu64
/* irq_sum */
1194 " %" PRIu64
/* softirq_sum */
1195 " %" PRIu64
/* steal_sum */
1196 " %" PRIu64
/* guest_sum */
1197 " %" PRIu64
/* guest_nice_sum */
1209 if (cpuall_len
> 0 && cpuall_len
< CPUALL_MAX_SIZE
) {
1210 memcpy(cache
, cpuall
, cpuall_len
);
1211 cache
+= cpuall_len
;
1213 /* shouldn't happen */
1214 lxcfs_error("proc_stat_read copy cpuall failed, cpuall_len=%d", cpuall_len
);
1218 memmove(cache
, d
->buf
+ CPUALL_MAX_SIZE
, total_len
);
1219 total_len
+= cpuall_len
;
1223 d
->size
= total_len
;
1224 if (total_len
> size
)
1227 memcpy(buf
, d
->buf
, total_len
);
1231 /* Note that "memory.stat" in cgroup2 is hierarchical by default. */
1232 static bool cgroup_parse_memory_stat(const char *cgroup
, struct memory_stat
*mstat
)
1234 __do_close
int fd
= -EBADF
;
1235 __do_fclose
FILE *f
= NULL
;
1236 __do_free
char *line
= NULL
;
1237 __do_free
void *fdopen_cache
= NULL
;
1242 fd
= cgroup_ops
->get_memory_stats_fd(cgroup_ops
, cgroup
);
1246 f
= fdopen_cached(fd
, "re", &fdopen_cache
);
1250 unified
= pure_unified_layout(cgroup_ops
);
1251 while ((linelen
= getline(&line
, &len
, f
)) != -1) {
1252 if (!unified
&& startswith(line
, "hierarchical_memory_limit")) {
1253 sscanf(line
, "hierarchical_memory_limit %" PRIu64
, &(mstat
->hierarchical_memory_limit
));
1254 } else if (!unified
&& startswith(line
, "hierarchical_memsw_limit")) {
1255 sscanf(line
, "hierarchical_memsw_limit %" PRIu64
, &(mstat
->hierarchical_memsw_limit
));
1256 } else if (startswith(line
, unified
? "file" :"total_cache")) {
1257 sscanf(line
, unified
? "file %" PRIu64
: "total_cache %" PRIu64
, &(mstat
->total_cache
));
1258 } else if (!unified
&& startswith(line
, "total_rss")) {
1259 sscanf(line
, "total_rss %" PRIu64
, &(mstat
->total_rss
));
1260 } else if (!unified
&& startswith(line
, "total_rss_huge")) {
1261 sscanf(line
, "total_rss_huge %" PRIu64
, &(mstat
->total_rss_huge
));
1262 } else if (startswith(line
, unified
? "shmem" : "total_shmem")) {
1263 sscanf(line
, unified
? "shmem %" PRIu64
: "total_shmem %" PRIu64
, &(mstat
->total_shmem
));
1264 } else if (startswith(line
, unified
? "file_mapped" : "total_mapped_file")) {
1265 sscanf(line
, unified
? "file_mapped %" PRIu64
: "total_mapped_file %" PRIu64
, &(mstat
->total_mapped_file
));
1266 } else if (!unified
&& startswith(line
, "total_dirty")) {
1267 sscanf(line
, "total_dirty %" PRIu64
, &(mstat
->total_dirty
));
1268 } else if (!unified
&& startswith(line
, "total_writeback")) {
1269 sscanf(line
, "total_writeback %" PRIu64
, &(mstat
->total_writeback
));
1270 } else if (!unified
&& startswith(line
, "total_swap")) {
1271 sscanf(line
, "total_swap %" PRIu64
, &(mstat
->total_swap
));
1272 } else if (!unified
&& startswith(line
, "total_pgpgin")) {
1273 sscanf(line
, "total_pgpgin %" PRIu64
, &(mstat
->total_pgpgin
));
1274 } else if (!unified
&& startswith(line
, "total_pgpgout")) {
1275 sscanf(line
, "total_pgpgout %" PRIu64
, &(mstat
->total_pgpgout
));
1276 } else if (startswith(line
, unified
? "pgfault" : "total_pgfault")) {
1277 sscanf(line
, unified
? "pgfault %" PRIu64
: "total_pgfault %" PRIu64
, &(mstat
->total_pgfault
));
1278 } else if (startswith(line
, unified
? "pgmajfault" : "total_pgmajfault")) {
1279 sscanf(line
, unified
? "pgmajfault %" PRIu64
: "total_pgmajfault %" PRIu64
, &(mstat
->total_pgmajfault
));
1280 } else if (startswith(line
, unified
? "inactive_anon" : "total_inactive_anon")) {
1281 sscanf(line
, unified
? "inactive_anon %" PRIu64
: "total_inactive_anon %" PRIu64
, &(mstat
->total_inactive_anon
));
1282 } else if (startswith(line
, unified
? "active_anon" : "total_active_anon")) {
1283 sscanf(line
, unified
? "active_anon %" PRIu64
: "total_active_anon %" PRIu64
, &(mstat
->total_active_anon
));
1284 } else if (startswith(line
, unified
? "inactive_file" : "total_inactive_file")) {
1285 sscanf(line
, unified
? "inactive_file %" PRIu64
: "total_inactive_file %" PRIu64
, &(mstat
->total_inactive_file
));
1286 } else if (startswith(line
, unified
? "active_file" : "total_active_file")) {
1287 sscanf(line
, unified
? "active_file %" PRIu64
: "total_active_file %" PRIu64
, &(mstat
->total_active_file
));
1288 } else if (startswith(line
, unified
? "unevictable" : "total_unevictable")) {
1289 sscanf(line
, unified
? "unevictable %" PRIu64
: "total_unevictable %" PRIu64
, &(mstat
->total_unevictable
));
1296 static int proc_meminfo_read(char *buf
, size_t size
, off_t offset
,
1297 struct fuse_file_info
*fi
)
1299 __do_free
char *cgroup
= NULL
, *line
= NULL
, *memusage_str
= NULL
,
1300 *memswusage_str
= NULL
, *memswpriority_str
= NULL
;
1301 __do_free
void *fopen_cache
= NULL
;
1302 __do_fclose
FILE *f
= NULL
;
1303 struct fuse_context
*fc
= fuse_get_context();
1304 bool wants_swap
= lxcfs_has_opt(fuse_get_context()->private_data
, LXCFS_SWAP_ON
);
1305 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
1306 uint64_t memlimit
= 0, memusage
= 0,
1307 hosttotal
= 0, swfree
= 0, swusage
= 0, swtotal
= 0,
1309 struct memory_stat mstat
= {};
1310 size_t linelen
= 0, total_len
= 0;
1311 char *cache
= d
->buf
;
1312 size_t cache_size
= d
->buflen
;
1318 if (offset
> d
->size
)
1324 left
= d
->size
- offset
;
1325 total_len
= left
> size
? size
: left
;
1326 memcpy(buf
, cache
+ offset
, total_len
);
1331 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
1332 if (initpid
<= 1 || is_shared_pidns(initpid
))
1335 cgroup
= get_pid_cgroup(initpid
, "memory");
1337 return read_file_fuse("/proc/meminfo", buf
, size
, d
);
1339 prune_init_slice(cgroup
);
1342 ret
= cgroup_ops
->get_memory_current(cgroup_ops
, cgroup
, &memusage_str
);
1344 return read_file_fuse("/proc/meminfo", buf
, size
, d
);
1346 if (safe_uint64(memusage_str
, &memusage
, 10) < 0)
1347 lxcfs_error("Failed to convert memusage %s", memusage_str
);
1349 if (!cgroup_parse_memory_stat(cgroup
, &mstat
))
1350 return read_file_fuse("/proc/meminfo", buf
, size
, d
);
1352 ret
= get_min_memlimit(cgroup
, false, &memlimit
);
1354 return read_file_fuse("/proc/meminfo", buf
, size
, d
);
1356 * Following values are allowed to fail, because swapaccount might be
1357 * turned off for current kernel.
1360 get_swap_info(cgroup
, memlimit
, memusage
, &swtotal
, &swusage
, &memswpriority
);
1362 f
= fopen_cached("/proc/meminfo", "re", &fopen_cache
);
1364 return read_file_fuse("/proc/meminfo", buf
, size
, d
);
1368 while (getline(&line
, &linelen
, f
) != -1) {
1370 char *printme
, lbuf
[100];
1372 memset(lbuf
, 0, 100);
1373 if (startswith(line
, "MemTotal:")) {
1374 sscanf(line
+sizeof("MemTotal:")-1, "%" PRIu64
, &hosttotal
);
1376 memlimit
= hosttotal
;
1378 if (hosttotal
< memlimit
)
1379 memlimit
= hosttotal
;
1380 snprintf(lbuf
, 100, "MemTotal: %8" PRIu64
" kB\n", memlimit
);
1382 } else if (startswith(line
, "MemFree:")) {
1383 snprintf(lbuf
, 100, "MemFree: %8" PRIu64
" kB\n", memlimit
- memusage
);
1385 } else if (startswith(line
, "MemAvailable:")) {
1386 snprintf(lbuf
, 100, "MemAvailable: %8" PRIu64
" kB\n", memlimit
- memusage
+ (mstat
.total_active_file
+ mstat
.total_inactive_file
) / 1024);
1388 } else if (startswith(line
, "SwapTotal:")) {
1390 uint64_t hostswtotal
= 0;
1392 sscanf(line
+ STRLITERALLEN("SwapTotal:"), "%" PRIu64
, &hostswtotal
);
1394 /* In cgroups v1, the total amount of swap is always reported to be the
1395 lesser of the RAM+SWAP limit or the SWAP device size.
1396 This is because the kernel can swap as much as it
1397 wants and not only up to swtotal. */
1398 if (!liblxcfs_memory_is_cgroupv2())
1399 swtotal
+= memlimit
;
1401 if (hostswtotal
< swtotal
) {
1402 swtotal
= hostswtotal
;
1405 /* When swappiness is 0, pretend we can't swap. */
1406 if (memswpriority
== 0) {
1411 snprintf(lbuf
, 100, "SwapTotal: %8" PRIu64
" kB\n", swtotal
);
1413 } else if (startswith(line
, "SwapFree:")) {
1415 swfree
= swtotal
- swusage
;
1418 snprintf(lbuf
, 100, "SwapFree: %8" PRIu64
" kB\n", swfree
);
1420 } else if (startswith(line
, "Slab:")) {
1421 snprintf(lbuf
, 100, "Slab: %8" PRIu64
" kB\n", (uint64_t)0);
1423 } else if (startswith(line
, "Buffers:")) {
1424 snprintf(lbuf
, 100, "Buffers: %8" PRIu64
" kB\n", (uint64_t)0);
1426 } else if (startswith(line
, "Cached:")) {
1427 snprintf(lbuf
, 100, "Cached: %8" PRIu64
" kB\n",
1428 mstat
.total_cache
/ 1024);
1430 } else if (startswith(line
, "SwapCached:")) {
1431 snprintf(lbuf
, 100, "SwapCached: %8" PRIu64
" kB\n", (uint64_t)0);
1433 } else if (startswith(line
, "Active:")) {
1434 snprintf(lbuf
, 100, "Active: %8" PRIu64
" kB\n",
1435 (mstat
.total_active_anon
+
1436 mstat
.total_active_file
) /
1439 } else if (startswith(line
, "Inactive:")) {
1440 snprintf(lbuf
, 100, "Inactive: %8" PRIu64
" kB\n",
1441 (mstat
.total_inactive_anon
+
1442 mstat
.total_inactive_file
) /
1445 } else if (startswith(line
, "Active(anon):")) {
1446 snprintf(lbuf
, 100, "Active(anon): %8" PRIu64
" kB\n",
1447 mstat
.total_active_anon
/ 1024);
1449 } else if (startswith(line
, "Inactive(anon):")) {
1450 snprintf(lbuf
, 100, "Inactive(anon): %8" PRIu64
" kB\n",
1451 mstat
.total_inactive_anon
/ 1024);
1453 } else if (startswith(line
, "Active(file):")) {
1454 snprintf(lbuf
, 100, "Active(file): %8" PRIu64
" kB\n",
1455 mstat
.total_active_file
/ 1024);
1457 } else if (startswith(line
, "Inactive(file):")) {
1458 snprintf(lbuf
, 100, "Inactive(file): %8" PRIu64
" kB\n",
1459 mstat
.total_inactive_file
/ 1024);
1461 } else if (startswith(line
, "Unevictable:")) {
1462 snprintf(lbuf
, 100, "Unevictable: %8" PRIu64
" kB\n",
1463 mstat
.total_unevictable
/ 1024);
1465 } else if (startswith(line
, "Dirty:")) {
1466 snprintf(lbuf
, 100, "Dirty: %8" PRIu64
" kB\n",
1467 mstat
.total_dirty
/ 1024);
1469 } else if (startswith(line
, "Writeback:")) {
1470 snprintf(lbuf
, 100, "Writeback: %8" PRIu64
" kB\n",
1471 mstat
.total_writeback
/ 1024);
1473 } else if (startswith(line
, "AnonPages:")) {
1474 snprintf(lbuf
, 100, "AnonPages: %8" PRIu64
" kB\n",
1475 (mstat
.total_active_anon
+
1476 mstat
.total_inactive_anon
- mstat
.total_shmem
) /
1479 } else if (startswith(line
, "Mapped:")) {
1480 snprintf(lbuf
, 100, "Mapped: %8" PRIu64
" kB\n",
1481 mstat
.total_mapped_file
/ 1024);
1483 } else if (startswith(line
, "SReclaimable:")) {
1484 snprintf(lbuf
, 100, "SReclaimable: %8" PRIu64
" kB\n", (uint64_t)0);
1486 } else if (startswith(line
, "SUnreclaim:")) {
1487 snprintf(lbuf
, 100, "SUnreclaim: %8" PRIu64
" kB\n", (uint64_t)0);
1489 } else if (startswith(line
, "Shmem:")) {
1490 snprintf(lbuf
, 100, "Shmem: %8" PRIu64
" kB\n",
1491 mstat
.total_shmem
/ 1024);
1493 } else if (startswith(line
, "ShmemHugePages:")) {
1494 snprintf(lbuf
, 100, "ShmemHugePages: %8" PRIu64
" kB\n", (uint64_t)0);
1496 } else if (startswith(line
, "ShmemPmdMapped:")) {
1497 snprintf(lbuf
, 100, "ShmemPmdMapped: %8" PRIu64
" kB\n", (uint64_t)0);
1499 } else if (startswith(line
, "AnonHugePages:")) {
1500 snprintf(lbuf
, 100, "AnonHugePages: %8" PRIu64
" kB\n",
1501 mstat
.total_rss_huge
/ 1024);
1507 l
= snprintf(cache
, cache_size
, "%s", printme
);
1509 return log_error(0, "Failed to write cache");
1510 if ((size_t)l
>= cache_size
)
1511 return log_error(0, "Write to cache was truncated");
1519 d
->size
= total_len
;
1520 if (total_len
> size
)
1522 memcpy(buf
, d
->buf
, total_len
);
1527 static int proc_slabinfo_read(char *buf
, size_t size
, off_t offset
,
1528 struct fuse_file_info
*fi
)
1530 __do_free
char *cgroup
= NULL
, *line
= NULL
;
1531 __do_free
void *fopen_cache
= NULL
;
1532 __do_fclose
FILE *f
= NULL
;
1533 __do_close
int fd
= -EBADF
;
1534 struct fuse_context
*fc
= fuse_get_context();
1535 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
1536 size_t linelen
= 0, total_len
= 0;
1537 char *cache
= d
->buf
;
1538 size_t cache_size
= d
->buflen
;
1544 if (offset
> d
->size
)
1550 left
= d
->size
- offset
;
1551 total_len
= left
> size
? size
: left
;
1552 memcpy(buf
, cache
+ offset
, total_len
);
1557 initpid
= lookup_initpid_in_store(fc
->pid
);
1558 if (initpid
<= 1 || is_shared_pidns(initpid
))
1561 cgroup
= get_pid_cgroup(initpid
, "memory");
1563 return read_file_fuse("/proc/slabinfo", buf
, size
, d
);
1565 prune_init_slice(cgroup
);
1567 fd
= cgroup_ops
->get_memory_slabinfo_fd(cgroup_ops
, cgroup
);
1569 return read_file_fuse("/proc/slabinfo", buf
, size
, d
);
1571 f
= fdopen_cached(fd
, "re", &fopen_cache
);
1573 return read_file_fuse("/proc/slabinfo", buf
, size
, d
);
1575 while (getline(&line
, &linelen
, f
) != -1) {
1576 ssize_t l
= snprintf(cache
, cache_size
, "%s", line
);
1578 return log_error(0, "Failed to write cache");
1579 if ((size_t)l
>= cache_size
)
1580 return log_error(0, "Write to cache was truncated");
1588 d
->size
= total_len
;
1589 if (total_len
> size
)
1591 memcpy(buf
, d
->buf
, total_len
);
1596 static int proc_read_with_personality(int (*do_proc_read
)(char *, size_t, off_t
,
1597 struct fuse_file_info
*), char *buf
, size_t size
, off_t offset
,
1598 struct fuse_file_info
*fi
)
1600 struct fuse_context
*fc
= fuse_get_context();
1601 __u32 host_personality
= liblxcfs_personality(), caller_personality
;
1602 bool change_personality
;
1605 if (get_task_personality(fc
->pid
, &caller_personality
) < 0)
1606 return log_error(0, "Failed to get caller process (pid: %d) personality", fc
->pid
);
1608 /* do we need to change thread personality? */
1609 change_personality
= host_personality
!= caller_personality
;
1611 if (change_personality
) {
1612 ret
= personality(caller_personality
);
1614 return log_error(0, "Call to personality(%d) failed: %s\n",
1615 caller_personality
, strerror(errno
));
1617 lxcfs_debug("task (tid: %d) personality was changed %d -> %d\n",
1618 (int)syscall(SYS_gettid
), ret
, caller_personality
);
1621 read_ret
= do_proc_read(buf
, size
, offset
, fi
);
1623 if (change_personality
) {
1624 ret
= personality(host_personality
);
1626 return log_error(0, "Call to personality(%d) failed: %s\n",
1627 host_personality
, strerror(errno
));
1629 lxcfs_debug("task (tid: %d) personality was restored %d -> %d\n",
1630 (int)syscall(SYS_gettid
), ret
, host_personality
);
1636 __lxcfs_fuse_ops
int proc_read(const char *path
, char *buf
, size_t size
,
1637 off_t offset
, struct fuse_file_info
*fi
)
1639 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
1642 case LXC_TYPE_PROC_MEMINFO
:
1643 if (liblxcfs_functional())
1644 return proc_meminfo_read(buf
, size
, offset
, fi
);
1646 return read_file_fuse_with_offset(LXC_TYPE_PROC_MEMINFO_PATH
,
1647 buf
, size
, offset
, f
);
1648 case LXC_TYPE_PROC_CPUINFO
:
1649 if (liblxcfs_functional())
1650 return proc_read_with_personality(&proc_cpuinfo_read
, buf
, size
, offset
, fi
);
1652 return read_file_fuse_with_offset(LXC_TYPE_PROC_CPUINFO_PATH
,
1653 buf
, size
, offset
, f
);
1654 case LXC_TYPE_PROC_UPTIME
:
1655 if (liblxcfs_functional())
1656 return proc_uptime_read(buf
, size
, offset
, fi
);
1658 return read_file_fuse_with_offset(LXC_TYPE_PROC_UPTIME_PATH
,
1659 buf
, size
, offset
, f
);
1660 case LXC_TYPE_PROC_STAT
:
1661 if (liblxcfs_functional())
1662 return proc_stat_read(buf
, size
, offset
, fi
);
1664 return read_file_fuse_with_offset(LXC_TYPE_PROC_STAT_PATH
, buf
,
1666 case LXC_TYPE_PROC_DISKSTATS
:
1667 if (liblxcfs_functional())
1668 return proc_diskstats_read(buf
, size
, offset
, fi
);
1670 return read_file_fuse_with_offset(LXC_TYPE_PROC_DISKSTATS_PATH
,
1671 buf
, size
, offset
, f
);
1672 case LXC_TYPE_PROC_SWAPS
:
1673 if (liblxcfs_functional())
1674 return proc_swaps_read(buf
, size
, offset
, fi
);
1676 return read_file_fuse_with_offset(LXC_TYPE_PROC_SWAPS_PATH
, buf
,
1678 case LXC_TYPE_PROC_LOADAVG
:
1679 if (liblxcfs_functional())
1680 return proc_loadavg_read(buf
, size
, offset
, fi
);
1682 return read_file_fuse_with_offset(LXC_TYPE_PROC_LOADAVG_PATH
,
1683 buf
, size
, offset
, f
);
1684 case LXC_TYPE_PROC_SLABINFO
:
1685 if (liblxcfs_functional())
1686 return proc_slabinfo_read(buf
, size
, offset
, fi
);
1688 return read_file_fuse_with_offset(LXC_TYPE_PROC_SLABINFO_PATH
,
1689 buf
, size
, offset
, f
);