1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #ifndef FUSE_USE_VERSION
8 #define FUSE_USE_VERSION 26
11 #define _FILE_OFFSET_BITS 64
13 #define __STDC_FORMAT_MACROS
31 #include <linux/magic.h>
32 #include <linux/sched.h>
33 #include <sys/epoll.h>
35 #include <sys/mount.h>
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/syscall.h>
39 #include <sys/sysinfo.h>
44 #include "cgroup_fuse.h"
45 #include "cgroups/cgroup.h"
46 #include "cgroups/cgroup_utils.h"
47 #include "cpuset_parse.h"
48 #include "memory_utils.h"
49 #include "proc_loadavg.h"
50 #include "proc_cpuview.h"
54 uint64_t hierarchical_memory_limit
;
55 uint64_t hierarchical_memsw_limit
;
58 uint64_t total_rss_huge
;
60 uint64_t total_mapped_file
;
62 uint64_t total_writeback
;
64 uint64_t total_pgpgin
;
65 uint64_t total_pgpgout
;
66 uint64_t total_pgfault
;
67 uint64_t total_pgmajfault
;
68 uint64_t total_inactive_anon
;
69 uint64_t total_active_anon
;
70 uint64_t total_inactive_file
;
71 uint64_t total_active_file
;
72 uint64_t total_unevictable
;
75 int proc_getattr(const char *path
, struct stat
*sb
)
79 memset(sb
, 0, sizeof(struct stat
));
80 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
83 sb
->st_uid
= sb
->st_gid
= 0;
84 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
85 if (strcmp(path
, "/proc") == 0) {
86 sb
->st_mode
= S_IFDIR
| 00555;
91 if (strcmp(path
, "/proc/meminfo") == 0 ||
92 strcmp(path
, "/proc/cpuinfo") == 0 ||
93 strcmp(path
, "/proc/uptime") == 0 ||
94 strcmp(path
, "/proc/stat") == 0 ||
95 strcmp(path
, "/proc/diskstats") == 0 ||
96 strcmp(path
, "/proc/swaps") == 0 ||
97 strcmp(path
, "/proc/loadavg") == 0) {
99 sb
->st_mode
= S_IFREG
| 00444;
107 int proc_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
,
108 off_t offset
, struct fuse_file_info
*fi
)
110 if (filler(buf
, ".", NULL
, 0) != 0 ||
111 filler(buf
, "..", NULL
, 0) != 0 ||
112 filler(buf
, "cpuinfo", NULL
, 0) != 0 ||
113 filler(buf
, "meminfo", NULL
, 0) != 0 ||
114 filler(buf
, "stat", NULL
, 0) != 0 ||
115 filler(buf
, "uptime", NULL
, 0) != 0 ||
116 filler(buf
, "diskstats", NULL
, 0) != 0 ||
117 filler(buf
, "swaps", NULL
, 0) != 0 ||
118 filler(buf
, "loadavg", NULL
, 0) != 0)
124 static off_t
get_procfile_size(const char *path
)
126 __do_fclose
FILE *f
= NULL
;
127 __do_free
char *line
= NULL
;
129 ssize_t sz
, answer
= 0;
131 f
= fopen(path
, "re");
135 while ((sz
= getline(&line
, &len
, f
)) != -1)
141 int proc_open(const char *path
, struct fuse_file_info
*fi
)
143 __do_free
struct file_info
*info
= NULL
;
146 if (strcmp(path
, "/proc/meminfo") == 0)
147 type
= LXC_TYPE_PROC_MEMINFO
;
148 else if (strcmp(path
, "/proc/cpuinfo") == 0)
149 type
= LXC_TYPE_PROC_CPUINFO
;
150 else if (strcmp(path
, "/proc/uptime") == 0)
151 type
= LXC_TYPE_PROC_UPTIME
;
152 else if (strcmp(path
, "/proc/stat") == 0)
153 type
= LXC_TYPE_PROC_STAT
;
154 else if (strcmp(path
, "/proc/diskstats") == 0)
155 type
= LXC_TYPE_PROC_DISKSTATS
;
156 else if (strcmp(path
, "/proc/swaps") == 0)
157 type
= LXC_TYPE_PROC_SWAPS
;
158 else if (strcmp(path
, "/proc/loadavg") == 0)
159 type
= LXC_TYPE_PROC_LOADAVG
;
163 info
= malloc(sizeof(*info
));
167 memset(info
, 0, sizeof(*info
));
170 info
->buflen
= get_procfile_size(path
) + BUF_RESERVE_SIZE
;
172 info
->buf
= malloc(info
->buflen
);
176 memset(info
->buf
, 0, info
->buflen
);
177 /* set actual size to buffer size */
178 info
->size
= info
->buflen
;
180 fi
->fh
= PTR_TO_UINT64(move_ptr(info
));
184 int proc_access(const char *path
, int mask
)
186 if (strcmp(path
, "/proc") == 0 && access(path
, R_OK
) == 0)
189 /* these are all read-only */
190 if ((mask
& ~R_OK
) != 0)
196 int proc_release(const char *path
, struct fuse_file_info
*fi
)
198 do_release_file_info(fi
);
202 static unsigned long get_memlimit(const char *cgroup
, bool swap
)
204 __do_free
char *memlimit_str
= NULL
;
205 unsigned long memlimit
= -1;
209 ret
= cgroup_ops
->get_memory_swap_max(cgroup_ops
, cgroup
, &memlimit_str
);
211 ret
= cgroup_ops
->get_memory_max(cgroup_ops
, cgroup
, &memlimit_str
);
213 memlimit
= strtoul(memlimit_str
, NULL
, 10);
218 static unsigned long get_min_memlimit(const char *cgroup
, bool swap
)
220 __do_free
char *copy
= NULL
;
221 unsigned long memlimit
= 0;
222 unsigned long retlimit
;
224 copy
= strdup(cgroup
);
226 return log_error_errno(0, ENOMEM
, "Failed to allocate memory");
228 retlimit
= get_memlimit(copy
, swap
);
230 while (strcmp(copy
, "/") != 0) {
234 memlimit
= get_memlimit(it
, swap
);
235 if (memlimit
!= -1 && memlimit
< retlimit
)
242 static inline bool startswith(const char *line
, const char *pref
)
244 return strncmp(line
, pref
, strlen(pref
)) == 0;
247 static int proc_swaps_read(char *buf
, size_t size
, off_t offset
,
248 struct fuse_file_info
*fi
)
250 __do_free
char *cg
= NULL
, *memswlimit_str
= NULL
, *memusage_str
= NULL
,
251 *memswusage_str
= NULL
;
252 struct fuse_context
*fc
= fuse_get_context();
253 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
254 unsigned long memswlimit
= 0, memlimit
= 0, memusage
= 0,
255 memswusage
= 0, swap_total
= 0, swap_free
= 0;
256 ssize_t total_len
= 0;
258 char *cache
= d
->buf
;
264 if (offset
> d
->size
)
270 left
= d
->size
- offset
;
271 total_len
= left
> size
? size
: left
;
272 memcpy(buf
, cache
+ offset
, total_len
);
277 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
278 if (initpid
<= 1 || is_shared_pidns(initpid
))
281 cg
= get_pid_cgroup(initpid
, "memory");
283 return read_file_fuse("/proc/swaps", buf
, size
, d
);
284 prune_init_slice(cg
);
286 memlimit
= get_min_memlimit(cg
, false);
288 ret
= cgroup_ops
->get_memory_current(cgroup_ops
, cg
, &memusage_str
);
292 memusage
= strtoul(memusage_str
, NULL
, 10);
294 ret
= cgroup_ops
->get_memory_swap_max(cgroup_ops
, cg
, &memswlimit_str
);
296 ret
= cgroup_ops
->get_memory_swap_current(cgroup_ops
, cg
, &memswusage_str
);
298 memswlimit
= get_min_memlimit(cg
, true);
299 memswusage
= strtoul(memswusage_str
, NULL
, 10);
300 swap_total
= (memswlimit
- memlimit
) / 1024;
301 swap_free
= (memswusage
- memusage
) / 1024;
304 total_len
= snprintf(d
->buf
, d
->size
, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
306 /* When no mem + swap limit is specified or swapaccount=0*/
308 __do_free
char *line
= NULL
;
309 __do_free
void *fopen_cache
= NULL
;
310 __do_fclose
FILE *f
= NULL
;
313 f
= fopen_cached("/proc/meminfo", "re", &fopen_cache
);
317 while (getline(&line
, &linelen
, f
) != -1) {
318 if (startswith(line
, "SwapTotal:"))
319 sscanf(line
, "SwapTotal: %8lu kB", &swap_total
);
320 else if (startswith(line
, "SwapFree:"))
321 sscanf(line
, "SwapFree: %8lu kB", &swap_free
);
325 if (swap_total
> 0) {
326 l
= snprintf(d
->buf
+ total_len
, d
->size
- total_len
,
327 "none%*svirtual\t\t%lu\t%lu\t0\n", 36, " ",
328 swap_total
, swap_free
);
332 if (total_len
< 0 || l
< 0)
333 return log_error(0, "Failed writing to cache");
336 d
->size
= (int)total_len
;
338 if (total_len
> size
)
340 memcpy(buf
, d
->buf
, total_len
);
345 static void get_blkio_io_value(char *str
, unsigned major
, unsigned minor
,
346 char *iotype
, unsigned long *v
)
353 snprintf(key
, 32, "%u:%u %s", major
, minor
, iotype
);
358 if (startswith(str
, key
)) {
359 sscanf(str
+ len
, "%lu", v
);
362 eol
= strchr(str
, '\n');
369 static int proc_diskstats_read(char *buf
, size_t size
, off_t offset
,
370 struct fuse_file_info
*fi
)
372 __do_free
char *cg
= NULL
, *io_serviced_str
= NULL
,
373 *io_merged_str
= NULL
, *io_service_bytes_str
= NULL
,
374 *io_wait_time_str
= NULL
, *io_service_time_str
= NULL
,
376 __do_free
void *fopen_cache
= NULL
;
377 __do_fclose
FILE *f
= NULL
;
378 struct fuse_context
*fc
= fuse_get_context();
379 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
380 unsigned long read
= 0, write
= 0;
381 unsigned long read_merged
= 0, write_merged
= 0;
382 unsigned long read_sectors
= 0, write_sectors
= 0;
383 unsigned long read_ticks
= 0, write_ticks
= 0;
384 unsigned long ios_pgr
= 0, tot_ticks
= 0, rq_ticks
= 0;
385 unsigned long rd_svctm
= 0, wr_svctm
= 0, rd_wait
= 0, wr_wait
= 0;
386 char *cache
= d
->buf
;
387 size_t cache_size
= d
->buflen
;
388 size_t linelen
= 0, total_len
= 0;
389 unsigned int major
= 0, minor
= 0;
397 if (offset
> d
->size
)
403 left
= d
->size
- offset
;
404 total_len
= left
> size
? size
: left
;
405 memcpy(buf
, cache
+ offset
, total_len
);
410 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
411 if (initpid
<= 1 || is_shared_pidns(initpid
))
414 cg
= get_pid_cgroup(initpid
, "blkio");
416 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
417 prune_init_slice(cg
);
419 ret
= cgroup_ops
->get_io_serviced(cgroup_ops
, cg
, &io_serviced_str
);
421 if (ret
== -EOPNOTSUPP
)
422 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
425 ret
= cgroup_ops
->get_io_merged(cgroup_ops
, cg
, &io_merged_str
);
427 if (ret
== -EOPNOTSUPP
)
428 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
431 ret
= cgroup_ops
->get_io_service_bytes(cgroup_ops
, cg
, &io_service_bytes_str
);
433 if (ret
== -EOPNOTSUPP
)
434 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
437 ret
= cgroup_ops
->get_io_wait_time(cgroup_ops
, cg
, &io_wait_time_str
);
439 if (ret
== -EOPNOTSUPP
)
440 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
443 ret
= cgroup_ops
->get_io_service_time(cgroup_ops
, cg
, &io_service_time_str
);
445 if (ret
== -EOPNOTSUPP
)
446 return read_file_fuse("/proc/diskstats", buf
, size
, d
);
449 f
= fopen_cached("/proc/diskstats", "re", &fopen_cache
);
453 while (getline(&line
, &linelen
, f
) != -1) {
457 i
= sscanf(line
, "%u %u %71s", &major
, &minor
, dev_name
);
461 get_blkio_io_value(io_serviced_str
, major
, minor
, "Read", &read
);
462 get_blkio_io_value(io_serviced_str
, major
, minor
, "Write", &write
);
463 get_blkio_io_value(io_merged_str
, major
, minor
, "Read", &read_merged
);
464 get_blkio_io_value(io_merged_str
, major
, minor
, "Write", &write_merged
);
465 get_blkio_io_value(io_service_bytes_str
, major
, minor
, "Read", &read_sectors
);
466 read_sectors
= read_sectors
/512;
467 get_blkio_io_value(io_service_bytes_str
, major
, minor
, "Write", &write_sectors
);
468 write_sectors
= write_sectors
/512;
470 get_blkio_io_value(io_service_time_str
, major
, minor
, "Read", &rd_svctm
);
471 rd_svctm
= rd_svctm
/1000000;
472 get_blkio_io_value(io_wait_time_str
, major
, minor
, "Read", &rd_wait
);
473 rd_wait
= rd_wait
/1000000;
474 read_ticks
= rd_svctm
+ rd_wait
;
476 get_blkio_io_value(io_service_time_str
, major
, minor
, "Write", &wr_svctm
);
477 wr_svctm
= wr_svctm
/1000000;
478 get_blkio_io_value(io_wait_time_str
, major
, minor
, "Write", &wr_wait
);
479 wr_wait
= wr_wait
/1000000;
480 write_ticks
= wr_svctm
+ wr_wait
;
482 get_blkio_io_value(io_service_time_str
, major
, minor
, "Total", &tot_ticks
);
483 tot_ticks
= tot_ticks
/1000000;
485 memset(lbuf
, 0, 256);
486 if (read
|| write
|| read_merged
|| write_merged
|| read_sectors
|| write_sectors
|| read_ticks
|| write_ticks
)
487 snprintf(lbuf
, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
488 major
, minor
, dev_name
, read
, read_merged
, read_sectors
, read_ticks
,
489 write
, write_merged
, write_sectors
, write_ticks
, ios_pgr
, tot_ticks
, rq_ticks
);
493 l
= snprintf(cache
, cache_size
, "%s", lbuf
);
495 return log_error(0, "Failed to write cache");
497 return log_error(0, "Write to cache was truncated");
506 if (total_len
> size
) total_len
= size
;
507 memcpy(buf
, d
->buf
, total_len
);
513 static inline void iwashere(void)
515 mknod("/tmp/lxcfs-iwashere", S_IFREG
, 0644);
519 /* This function retrieves the busy time of a group of tasks by looking at
520 * cpuacct.usage. Unfortunately, this only makes sense when the container has
521 * been given it's own cpuacct cgroup. If not, this function will take the busy
522 * time of all other taks that do not actually belong to the container into
523 * account as well. If someone has a clever solution for this please send a
526 static double get_reaper_busy(pid_t task
)
528 __do_free
char *cgroup
= NULL
, *usage_str
= NULL
;
529 unsigned long usage
= 0;
532 initpid
= lookup_initpid_in_store(task
);
536 cgroup
= get_pid_cgroup(initpid
, "cpuacct");
539 prune_init_slice(cgroup
);
540 if (!cgroup_ops
->get(cgroup_ops
, "cpuacct", cgroup
, "cpuacct.usage",
544 usage
= strtoul(usage_str
, NULL
, 10);
545 return ((double)usage
/ 1000000000);
548 static uint64_t get_reaper_start_time(pid_t pid
)
550 __do_free
void *fopen_cache
= NULL
;
551 __do_fclose
FILE *f
= NULL
;
554 /* strlen("/proc/") = 6
558 * strlen("/stat") = 5
562 #define __PROC_PID_STAT_LEN (6 + LXCFS_NUMSTRLEN64 + 5 + 1)
563 char path
[__PROC_PID_STAT_LEN
];
566 qpid
= lookup_initpid_in_store(pid
);
568 /* Caller can check for EINVAL on 0. */
573 ret
= snprintf(path
, __PROC_PID_STAT_LEN
, "/proc/%d/stat", qpid
);
574 if (ret
< 0 || ret
>= __PROC_PID_STAT_LEN
) {
575 /* Caller can check for EINVAL on 0. */
580 f
= fopen_cached(path
, "re", &fopen_cache
);
582 /* Caller can check for EINVAL on 0. */
587 /* Note that the *scanf() argument supression requires that length
588 * modifiers such as "l" are omitted. Otherwise some compilers will yell
589 * at us. It's like telling someone you're not married and then asking
590 * if you can bring your wife to the party.
592 ret
= fscanf(f
, "%*d " /* (1) pid %d */
593 "%*s " /* (2) comm %s */
594 "%*c " /* (3) state %c */
595 "%*d " /* (4) ppid %d */
596 "%*d " /* (5) pgrp %d */
597 "%*d " /* (6) session %d */
598 "%*d " /* (7) tty_nr %d */
599 "%*d " /* (8) tpgid %d */
600 "%*u " /* (9) flags %u */
601 "%*u " /* (10) minflt %lu */
602 "%*u " /* (11) cminflt %lu */
603 "%*u " /* (12) majflt %lu */
604 "%*u " /* (13) cmajflt %lu */
605 "%*u " /* (14) utime %lu */
606 "%*u " /* (15) stime %lu */
607 "%*d " /* (16) cutime %ld */
608 "%*d " /* (17) cstime %ld */
609 "%*d " /* (18) priority %ld */
610 "%*d " /* (19) nice %ld */
611 "%*d " /* (20) num_threads %ld */
612 "%*d " /* (21) itrealvalue %ld */
613 "%" PRIu64
, /* (22) starttime %llu */
616 return ret_set_errno(0, EINVAL
);
618 return ret_set_errno(starttime
, 0);
621 static double get_reaper_start_time_in_sec(pid_t pid
)
623 uint64_t clockticks
, ticks_per_sec
;
627 clockticks
= get_reaper_start_time(pid
);
628 if (clockticks
== 0 && errno
== EINVAL
)
629 return log_debug(0, "Failed to retrieve start time of pid %d", pid
);
631 ret
= sysconf(_SC_CLK_TCK
);
632 if (ret
< 0 && errno
== EINVAL
)
633 return log_debug(0, "Failed to determine number of clock ticks in a second");
635 ticks_per_sec
= (uint64_t)ret
;
636 res
= (double)clockticks
/ ticks_per_sec
;
640 static double get_reaper_age(pid_t pid
)
643 double procstart
, procage
;
645 /* We need to substract the time the process has started since system
646 * boot minus the time when the system has started to get the actual
649 procstart
= get_reaper_start_time_in_sec(pid
);
653 struct timespec spec
;
655 ret
= clock_gettime(CLOCK_BOOTTIME
, &spec
);
659 /* We could make this more precise here by using the tv_nsec
660 * field in the timespec struct and convert it to milliseconds
661 * and then create a double for the seconds and milliseconds but
662 * that seems more work than it is worth.
664 uptime_ms
= (spec
.tv_sec
* 1000) + (spec
.tv_nsec
* 1e-6);
665 procage
= (uptime_ms
- (procstart
* 1000)) / 1000;
672 * We read /proc/uptime and reuse its second field.
673 * For the first field, we use the mtime for the reaper for
674 * the calling pid as returned by getreaperage
676 static int proc_uptime_read(char *buf
, size_t size
, off_t offset
,
677 struct fuse_file_info
*fi
)
679 struct fuse_context
*fc
= fuse_get_context();
680 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
681 double busytime
= get_reaper_busy(fc
->pid
);
682 char *cache
= d
->buf
;
683 ssize_t total_len
= 0;
684 double idletime
, reaperage
;
696 if (offset
> d
->size
)
699 left
= d
->size
- offset
;
700 total_len
= left
> size
? size
: left
;
701 memcpy(buf
, cache
+ offset
, total_len
);
706 reaperage
= get_reaper_age(fc
->pid
);
708 * To understand why this is done, please read the comment to the
709 * get_reaper_busy() function.
711 idletime
= reaperage
;
712 if (reaperage
>= busytime
)
713 idletime
= reaperage
- busytime
;
715 total_len
= snprintf(d
->buf
, d
->buflen
, "%.2lf %.2lf\n", reaperage
, idletime
);
716 if (total_len
< 0 || total_len
>= d
->buflen
)
717 return log_error(0, "Failed to write to cache");
719 d
->size
= (int)total_len
;
722 if (total_len
> size
)
725 memcpy(buf
, d
->buf
, total_len
);
729 #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2)
730 static int proc_stat_read(char *buf
, size_t size
, off_t offset
,
731 struct fuse_file_info
*fi
)
733 __do_free
char *cg
= NULL
, *cpuset
= NULL
, *line
= NULL
;
734 __do_free
void *fopen_cache
= NULL
;
735 __do_free
struct cpuacct_usage
*cg_cpu_usage
= NULL
;
736 __do_fclose
FILE *f
= NULL
;
737 struct fuse_context
*fc
= fuse_get_context();
738 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
739 size_t linelen
= 0, total_len
= 0;
740 int curcpu
= -1; /* cpu numbering starts at 0 */
742 unsigned long user
= 0, nice
= 0, system
= 0, idle
= 0, iowait
= 0,
743 irq
= 0, softirq
= 0, steal
= 0, guest
= 0, guest_nice
= 0;
744 unsigned long user_sum
= 0, nice_sum
= 0, system_sum
= 0, idle_sum
= 0,
745 iowait_sum
= 0, irq_sum
= 0, softirq_sum
= 0,
746 steal_sum
= 0, guest_sum
= 0, guest_nice_sum
= 0;
747 char cpuall
[CPUALL_MAX_SIZE
];
748 /* reserve for cpu all */
749 char *cache
= d
->buf
+ CPUALL_MAX_SIZE
;
750 size_t cache_size
= d
->buflen
- CPUALL_MAX_SIZE
;
751 int cg_cpu_usage_size
= 0;
756 if (offset
> d
->size
)
762 left
= d
->size
- offset
;
763 total_len
= left
> size
? size
: left
;
764 memcpy(buf
, d
->buf
+ offset
, total_len
);
769 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
770 if (initpid
<= 1 || is_shared_pidns(initpid
))
774 * when container run with host pid namespace initpid == 1, cgroup will "/"
775 * we should return host os's /proc contents.
776 * in some case cpuacct_usage.all in "/" will larger then /proc/stat
779 return read_file_fuse("/proc/stat", buf
, size
, d
);
781 cg
= get_pid_cgroup(initpid
, "cpuset");
783 return read_file_fuse("/proc/stat", buf
, size
, d
);
784 prune_init_slice(cg
);
786 cpuset
= get_cpuset(cg
);
791 * Read cpuacct.usage_all for all CPUs.
792 * If the cpuacct cgroup is present, it is used to calculate the container's
793 * CPU usage. If not, values from the host's /proc/stat are used.
795 if (read_cpuacct_usage_all(cg
, cpuset
, &cg_cpu_usage
, &cg_cpu_usage_size
) != 0)
796 lxcfs_v("%s\n", "proc_stat_read failed to read from cpuacct, falling back to the host's /proc/stat");
798 f
= fopen_cached("/proc/stat", "re", &fopen_cache
);
803 if (getline(&line
, &linelen
, f
) < 0)
804 return log_error(0, "proc_stat_read read first line failed");
806 if (cgroup_ops
->can_use_cpuview(cgroup_ops
) && cg_cpu_usage
) {
807 total_len
= cpuview_proc_stat(cg
, cpuset
, cg_cpu_usage
, cg_cpu_usage_size
,
808 f
, d
->buf
, d
->buflen
);
812 while (getline(&line
, &linelen
, f
) != -1) {
814 char cpu_char
[10]; /* That's a lot of cores */
816 uint64_t all_used
, cg_used
, new_idle
;
819 if (strlen(line
) == 0)
821 if (sscanf(line
, "cpu%9[^ ]", cpu_char
) != 1) {
822 /* not a ^cpuN line containing a number N, just print it */
823 l
= snprintf(cache
, cache_size
, "%s", line
);
825 return log_error(0, "Failed to write cache");
827 return log_error(0, "Write to cache was truncated");
836 if (sscanf(cpu_char
, "%d", &physcpu
) != 1)
839 if (!cpu_in_cpuset(physcpu
, cpuset
))
844 ret
= sscanf(line
, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
855 if (ret
!= 10 || !cg_cpu_usage
) {
856 c
= strchr(line
, ' ');
860 l
= snprintf(cache
, cache_size
, "cpu%d%s", curcpu
, c
);
862 return log_error(0, "Failed to write cache");
864 return log_error(0, "Write to cache was truncated");
875 if (physcpu
>= cg_cpu_usage_size
)
878 all_used
= user
+ nice
+ system
+ iowait
+ irq
+ softirq
+ steal
+ guest
+ guest_nice
;
879 cg_used
= cg_cpu_usage
[physcpu
].user
+ cg_cpu_usage
[physcpu
].system
;
881 if (all_used
>= cg_used
) {
882 new_idle
= idle
+ (all_used
- cg_used
);
885 lxcfs_error("cpu%d from %s has unexpected cpu time: %" PRIu64
" in /proc/stat, %" PRIu64
" in cpuacct.usage_all; unable to determine idle time",
886 curcpu
, cg
, all_used
, cg_used
);
890 l
= snprintf(cache
, cache_size
,
891 "cpu%d %" PRIu64
" 0 %" PRIu64
" %" PRIu64
" 0 0 0 0 0 0\n",
892 curcpu
, cg_cpu_usage
[physcpu
].user
,
893 cg_cpu_usage
[physcpu
].system
, new_idle
);
895 return log_error(0, "Failed to write cache");
897 return log_error(0, "Write to cache was truncated");
903 user_sum
+= cg_cpu_usage
[physcpu
].user
;
904 system_sum
+= cg_cpu_usage
[physcpu
].system
;
905 idle_sum
+= new_idle
;
909 system_sum
+= system
;
911 iowait_sum
+= iowait
;
913 softirq_sum
+= softirq
;
916 guest_nice_sum
+= guest_nice
;
922 int cpuall_len
= snprintf(cpuall
, CPUALL_MAX_SIZE
, "cpu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
933 if (cpuall_len
> 0 && cpuall_len
< CPUALL_MAX_SIZE
) {
934 memcpy(cache
, cpuall
, cpuall_len
);
937 /* shouldn't happen */
938 lxcfs_error("proc_stat_read copy cpuall failed, cpuall_len=%d", cpuall_len
);
942 memmove(cache
, d
->buf
+ CPUALL_MAX_SIZE
, total_len
);
943 total_len
+= cpuall_len
;
948 if (total_len
> size
)
951 memcpy(buf
, d
->buf
, total_len
);
955 /* Note that "memory.stat" in cgroup2 is hierarchical by default. */
956 static bool cgroup_parse_memory_stat(const char *cgroup
, struct memory_stat
*mstat
)
958 __do_close_prot_errno
int fd
= -EBADF
;
959 __do_fclose
FILE *f
= NULL
;
960 __do_free
char *line
= NULL
;
961 __do_free
void *fdopen_cache
= NULL
;
966 fd
= cgroup_ops
->get_memory_stats_fd(cgroup_ops
, cgroup
);
970 f
= fdopen_cached(fd
, "re", &fdopen_cache
);
974 unified
= pure_unified_layout(cgroup_ops
);
975 while ((linelen
= getline(&line
, &len
, f
)) != -1) {
976 if (!unified
&& startswith(line
, "hierarchical_memory_limit")) {
977 sscanf(line
, "hierarchical_memory_limit %" PRIu64
, &(mstat
->hierarchical_memory_limit
));
978 } else if (!unified
&& startswith(line
, "hierarchical_memsw_limit")) {
979 sscanf(line
, "hierarchical_memsw_limit %" PRIu64
, &(mstat
->hierarchical_memsw_limit
));
980 } else if (startswith(line
, unified
? "file" :"total_cache")) {
981 sscanf(line
, unified
? "file %" PRIu64
: "total_cache %" PRIu64
, &(mstat
->total_cache
));
982 } else if (!unified
&& startswith(line
, "total_rss")) {
983 sscanf(line
, "total_rss %" PRIu64
, &(mstat
->total_rss
));
984 } else if (!unified
&& startswith(line
, "total_rss_huge")) {
985 sscanf(line
, "total_rss_huge %" PRIu64
, &(mstat
->total_rss_huge
));
986 } else if (startswith(line
, unified
? "shmem" : "total_shmem")) {
987 sscanf(line
, unified
? "shmem %" PRIu64
: "total_shmem %" PRIu64
, &(mstat
->total_shmem
));
988 } else if (startswith(line
, unified
? "file_mapped" : "total_mapped_file")) {
989 sscanf(line
, unified
? "file_mapped %" PRIu64
: "total_mapped_file %" PRIu64
, &(mstat
->total_mapped_file
));
990 } else if (!unified
&& startswith(line
, "total_dirty")) {
991 sscanf(line
, "total_dirty %" PRIu64
, &(mstat
->total_dirty
));
992 } else if (!unified
&& startswith(line
, "total_writeback")) {
993 sscanf(line
, "total_writeback %" PRIu64
, &(mstat
->total_writeback
));
994 } else if (!unified
&& startswith(line
, "total_swap")) {
995 sscanf(line
, "total_swap %" PRIu64
, &(mstat
->total_swap
));
996 } else if (!unified
&& startswith(line
, "total_pgpgin")) {
997 sscanf(line
, "total_pgpgin %" PRIu64
, &(mstat
->total_pgpgin
));
998 } else if (!unified
&& startswith(line
, "total_pgpgout")) {
999 sscanf(line
, "total_pgpgout %" PRIu64
, &(mstat
->total_pgpgout
));
1000 } else if (startswith(line
, unified
? "pgfault" : "total_pgfault")) {
1001 sscanf(line
, unified
? "pgfault %" PRIu64
: "total_pgfault %" PRIu64
, &(mstat
->total_pgfault
));
1002 } else if (startswith(line
, unified
? "pgmajfault" : "total_pgmajfault")) {
1003 sscanf(line
, unified
? "pgmajfault %" PRIu64
: "total_pgmajfault %" PRIu64
, &(mstat
->total_pgmajfault
));
1004 } else if (startswith(line
, unified
? "inactive_anon" : "total_inactive_anon")) {
1005 sscanf(line
, unified
? "inactive_anon %" PRIu64
: "total_inactive_anon %" PRIu64
, &(mstat
->total_inactive_anon
));
1006 } else if (startswith(line
, unified
? "active_anon" : "total_active_anon")) {
1007 sscanf(line
, unified
? "active_anon %" PRIu64
: "total_active_anon %" PRIu64
, &(mstat
->total_active_anon
));
1008 } else if (startswith(line
, unified
? "inactive_file" : "total_inactive_file")) {
1009 sscanf(line
, unified
? "inactive_file %" PRIu64
: "total_inactive_file %" PRIu64
, &(mstat
->total_inactive_file
));
1010 } else if (startswith(line
, unified
? "active_file" : "total_active_file")) {
1011 sscanf(line
, unified
? "active_file %" PRIu64
: "total_active_file %" PRIu64
, &(mstat
->total_active_file
));
1012 } else if (startswith(line
, unified
? "unevictable" : "total_unevictable")) {
1013 sscanf(line
, unified
? "unevictable %" PRIu64
: "total_unevictable %" PRIu64
, &(mstat
->total_unevictable
));
1020 static int proc_meminfo_read(char *buf
, size_t size
, off_t offset
,
1021 struct fuse_file_info
*fi
)
1023 __do_free
char *cgroup
= NULL
, *line
= NULL
,
1024 *memusage_str
= NULL
, *memstat_str
= NULL
,
1025 *memswlimit_str
= NULL
, *memswusage_str
= NULL
;
1026 __do_free
void *fopen_cache
= NULL
;
1027 __do_fclose
FILE *f
= NULL
;
1028 struct fuse_context
*fc
= fuse_get_context();
1029 struct lxcfs_opts
*opts
= (struct lxcfs_opts
*)fuse_get_context()->private_data
;
1030 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
1031 uint64_t memlimit
= 0, memusage
= 0, memswlimit
= 0, memswusage
= 0,
1033 struct memory_stat mstat
= {};
1034 size_t linelen
= 0, total_len
= 0;
1035 char *cache
= d
->buf
;
1036 size_t cache_size
= d
->buflen
;
1042 if (offset
> d
->size
)
1048 left
= d
->size
- offset
;
1049 total_len
= left
> size
? size
: left
;
1050 memcpy(buf
, cache
+ offset
, total_len
);
1055 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
1056 if (initpid
<= 1 || is_shared_pidns(initpid
))
1059 cgroup
= get_pid_cgroup(initpid
, "memory");
1061 return read_file_fuse("/proc/meminfo", buf
, size
, d
);
1063 prune_init_slice(cgroup
);
1065 memlimit
= get_min_memlimit(cgroup
, false);
1067 ret
= cgroup_ops
->get_memory_current(cgroup_ops
, cgroup
, &memusage_str
);
1071 if (!cgroup_parse_memory_stat(cgroup
, &mstat
))
1075 * Following values are allowed to fail, because swapaccount might be
1076 * turned off for current kernel.
1078 ret
= cgroup_ops
->get_memory_swap_max(cgroup_ops
, cgroup
, &memswlimit_str
);
1080 ret
= cgroup_ops
->get_memory_swap_current(cgroup_ops
, cgroup
, &memswusage_str
);
1082 memswlimit
= get_min_memlimit(cgroup
, true);
1083 memswusage
= strtoul(memswusage_str
, NULL
, 10);
1084 memswlimit
= memswlimit
/ 1024;
1085 memswusage
= memswusage
/ 1024;
1088 memusage
= strtoul(memusage_str
, NULL
, 10);
1092 f
= fopen_cached("/proc/meminfo", "re", &fopen_cache
);
1096 while (getline(&line
, &linelen
, f
) != -1) {
1098 char *printme
, lbuf
[100];
1100 memset(lbuf
, 0, 100);
1101 if (startswith(line
, "MemTotal:")) {
1102 sscanf(line
+sizeof("MemTotal:")-1, "%" PRIu64
, &hosttotal
);
1103 if (hosttotal
< memlimit
)
1104 memlimit
= hosttotal
;
1105 snprintf(lbuf
, 100, "MemTotal: %8" PRIu64
" kB\n", memlimit
);
1107 } else if (startswith(line
, "MemFree:")) {
1108 snprintf(lbuf
, 100, "MemFree: %8" PRIu64
" kB\n", memlimit
- memusage
);
1110 } else if (startswith(line
, "MemAvailable:")) {
1111 snprintf(lbuf
, 100, "MemAvailable: %8" PRIu64
" kB\n", memlimit
- memusage
+ mstat
.total_cache
/ 1024);
1113 } else if (startswith(line
, "SwapTotal:") && memswlimit
> 0 &&
1114 opts
&& opts
->swap_off
== false) {
1115 memswlimit
-= memlimit
;
1116 snprintf(lbuf
, 100, "SwapTotal: %8" PRIu64
" kB\n", memswlimit
);
1118 } else if (startswith(line
, "SwapTotal:") && opts
&& opts
->swap_off
== true) {
1119 snprintf(lbuf
, 100, "SwapTotal: %8" PRIu64
" kB\n", (uint64_t)0);
1121 } else if (startswith(line
, "SwapFree:") && memswlimit
> 0 &&
1122 memswusage
> 0 && opts
&& opts
->swap_off
== false) {
1123 uint64_t swaptotal
= memswlimit
,
1124 swapusage
= memusage
> memswusage
1126 : memswusage
- memusage
,
1127 swapfree
= swapusage
< swaptotal
1128 ? swaptotal
- swapusage
1130 snprintf(lbuf
, 100, "SwapFree: %8" PRIu64
" kB\n", swapfree
);
1132 } else if (startswith(line
, "SwapFree:") && opts
&& opts
->swap_off
== true) {
1133 snprintf(lbuf
, 100, "SwapFree: %8" PRIu64
" kB\n", (uint64_t)0);
1135 } else if (startswith(line
, "Slab:")) {
1136 snprintf(lbuf
, 100, "Slab: %8" PRIu64
" kB\n", (uint64_t)0);
1138 } else if (startswith(line
, "Buffers:")) {
1139 snprintf(lbuf
, 100, "Buffers: %8" PRIu64
" kB\n", (uint64_t)0);
1141 } else if (startswith(line
, "Cached:")) {
1142 snprintf(lbuf
, 100, "Cached: %8" PRIu64
" kB\n",
1143 mstat
.total_cache
/ 1024);
1145 } else if (startswith(line
, "SwapCached:")) {
1146 snprintf(lbuf
, 100, "SwapCached: %8" PRIu64
" kB\n", (uint64_t)0);
1148 } else if (startswith(line
, "Active:")) {
1149 snprintf(lbuf
, 100, "Active: %8" PRIu64
" kB\n",
1150 (mstat
.total_active_anon
+
1151 mstat
.total_active_file
) /
1154 } else if (startswith(line
, "Inactive:")) {
1155 snprintf(lbuf
, 100, "Inactive: %8" PRIu64
" kB\n",
1156 (mstat
.total_inactive_anon
+
1157 mstat
.total_inactive_file
) /
1160 } else if (startswith(line
, "Active(anon)")) {
1161 snprintf(lbuf
, 100, "Active(anon): %8" PRIu64
" kB\n",
1162 mstat
.total_active_anon
/ 1024);
1164 } else if (startswith(line
, "Inactive(anon)")) {
1165 snprintf(lbuf
, 100, "Inactive(anon): %8" PRIu64
" kB\n",
1166 mstat
.total_inactive_anon
/ 1024);
1168 } else if (startswith(line
, "Active(file)")) {
1169 snprintf(lbuf
, 100, "Active(file): %8" PRIu64
" kB\n",
1170 mstat
.total_active_file
/ 1024);
1172 } else if (startswith(line
, "Inactive(file)")) {
1173 snprintf(lbuf
, 100, "Inactive(file): %8" PRIu64
" kB\n",
1174 mstat
.total_inactive_file
/ 1024);
1176 } else if (startswith(line
, "Unevictable")) {
1177 snprintf(lbuf
, 100, "Unevictable: %8" PRIu64
" kB\n",
1178 mstat
.total_unevictable
/ 1024);
1180 } else if (startswith(line
, "Dirty")) {
1181 snprintf(lbuf
, 100, "Dirty: %8" PRIu64
" kB\n",
1182 mstat
.total_dirty
/ 1024);
1184 } else if (startswith(line
, "Writeback")) {
1185 snprintf(lbuf
, 100, "Writeback: %8" PRIu64
" kB\n",
1186 mstat
.total_writeback
/ 1024);
1188 } else if (startswith(line
, "AnonPages")) {
1189 snprintf(lbuf
, 100, "AnonPages: %8" PRIu64
" kB\n",
1190 (mstat
.total_active_anon
+
1191 mstat
.total_inactive_anon
- mstat
.total_shmem
) /
1194 } else if (startswith(line
, "Mapped")) {
1195 snprintf(lbuf
, 100, "Mapped: %8" PRIu64
" kB\n",
1196 mstat
.total_mapped_file
/ 1024);
1198 } else if (startswith(line
, "SReclaimable")) {
1199 snprintf(lbuf
, 100, "SReclaimable: %8" PRIu64
" kB\n", (uint64_t)0);
1201 } else if (startswith(line
, "SUnreclaim")) {
1202 snprintf(lbuf
, 100, "SUnreclaim: %8" PRIu64
" kB\n", (uint64_t)0);
1204 } else if (startswith(line
, "Shmem:")) {
1205 snprintf(lbuf
, 100, "Shmem: %8" PRIu64
" kB\n",
1206 mstat
.total_shmem
/ 1024);
1208 } else if (startswith(line
, "ShmemHugePages")) {
1209 snprintf(lbuf
, 100, "ShmemHugePages: %8" PRIu64
" kB\n", (uint64_t)0);
1211 } else if (startswith(line
, "ShmemPmdMapped")) {
1212 snprintf(lbuf
, 100, "ShmemPmdMapped: %8" PRIu64
" kB\n", (uint64_t)0);
1214 } else if (startswith(line
, "AnonHugePages")) {
1215 snprintf(lbuf
, 100, "AnonHugePages: %8" PRIu64
" kB\n",
1216 mstat
.total_rss_huge
/ 1024);
1222 l
= snprintf(cache
, cache_size
, "%s", printme
);
1224 return log_error(0, "Failed to write cache");
1225 if (l
>= cache_size
)
1226 return log_error(0, "Write to cache was truncated");
1234 d
->size
= total_len
;
1235 if (total_len
> size
)
1237 memcpy(buf
, d
->buf
, total_len
);
1242 int proc_read(const char *path
, char *buf
, size_t size
, off_t offset
,
1243 struct fuse_file_info
*fi
)
1245 struct file_info
*f
= INTTYPE_TO_PTR(fi
->fh
);
1248 case LXC_TYPE_PROC_MEMINFO
:
1249 return proc_meminfo_read(buf
, size
, offset
, fi
);
1250 case LXC_TYPE_PROC_CPUINFO
:
1251 return proc_cpuinfo_read(buf
, size
, offset
, fi
);
1252 case LXC_TYPE_PROC_UPTIME
:
1253 return proc_uptime_read(buf
, size
, offset
, fi
);
1254 case LXC_TYPE_PROC_STAT
:
1255 return proc_stat_read(buf
, size
, offset
, fi
);
1256 case LXC_TYPE_PROC_DISKSTATS
:
1257 return proc_diskstats_read(buf
, size
, offset
, fi
);
1258 case LXC_TYPE_PROC_SWAPS
:
1259 return proc_swaps_read(buf
, size
, offset
, fi
);
1260 case LXC_TYPE_PROC_LOADAVG
:
1261 return proc_loadavg_read(buf
, size
, offset
, fi
);