]> git.proxmox.com Git - mirror_lxcfs.git/blame - src/proc_fuse.c
cgroups: Add get_memory_swappiness
[mirror_lxcfs.git] / src / proc_fuse.c
CommitLineData
db0463bf 1/* SPDX-License-Identifier: LGPL-2.1+ */
1f5596dd
CB
2
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6
f834b6bf
SP
7#include "config.h"
8
9#ifdef HAVE_FUSE3
10#ifndef FUSE_USE_VERSION
11#define FUSE_USE_VERSION 30
12#endif
13#else
1f5596dd
CB
14#ifndef FUSE_USE_VERSION
15#define FUSE_USE_VERSION 26
16#endif
f834b6bf 17#endif
1f5596dd
CB
18
19#define _FILE_OFFSET_BITS 64
20
21#define __STDC_FORMAT_MACROS
22#include <dirent.h>
23#include <errno.h>
24#include <fcntl.h>
25#include <fuse.h>
26#include <inttypes.h>
27#include <libgen.h>
28#include <pthread.h>
29#include <sched.h>
30#include <stdarg.h>
31#include <stdbool.h>
32#include <stdint.h>
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#include <time.h>
37#include <unistd.h>
38#include <wait.h>
39#include <linux/magic.h>
40#include <linux/sched.h>
41#include <sys/epoll.h>
42#include <sys/mman.h>
43#include <sys/mount.h>
44#include <sys/param.h>
45#include <sys/socket.h>
46#include <sys/syscall.h>
47#include <sys/sysinfo.h>
48#include <sys/vfs.h>
49
50#include "bindings.h"
1f5596dd
CB
51#include "cgroup_fuse.h"
52#include "cgroups/cgroup.h"
53#include "cgroups/cgroup_utils.h"
4ec5c9da 54#include "cpuset_parse.h"
ec2043ed 55#include "lxcfs_fuse_compat.h"
1f5596dd
CB
56#include "memory_utils.h"
57#include "proc_loadavg.h"
4ec5c9da 58#include "proc_cpuview.h"
1f5596dd
CB
59#include "utils.h"
60
acff9786
CB
61struct memory_stat {
62 uint64_t hierarchical_memory_limit;
63 uint64_t hierarchical_memsw_limit;
64 uint64_t total_cache;
65 uint64_t total_rss;
66 uint64_t total_rss_huge;
67 uint64_t total_shmem;
68 uint64_t total_mapped_file;
69 uint64_t total_dirty;
70 uint64_t total_writeback;
71 uint64_t total_swap;
72 uint64_t total_pgpgin;
73 uint64_t total_pgpgout;
74 uint64_t total_pgfault;
75 uint64_t total_pgmajfault;
76 uint64_t total_inactive_anon;
77 uint64_t total_active_anon;
78 uint64_t total_inactive_file;
79 uint64_t total_active_file;
80 uint64_t total_unevictable;
81};
82
2d7bcab7 83__lxcfs_fuse_ops int proc_getattr(const char *path, struct stat *sb)
1f5596dd
CB
84{
85 struct timespec now;
86
87 memset(sb, 0, sizeof(struct stat));
88 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
89 return -EINVAL;
f75d5b75 90
1f5596dd
CB
91 sb->st_uid = sb->st_gid = 0;
92 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
93 if (strcmp(path, "/proc") == 0) {
94 sb->st_mode = S_IFDIR | 00555;
95 sb->st_nlink = 2;
96 return 0;
97 }
f75d5b75
CB
98
99 if (strcmp(path, "/proc/meminfo") == 0 ||
100 strcmp(path, "/proc/cpuinfo") == 0 ||
101 strcmp(path, "/proc/uptime") == 0 ||
102 strcmp(path, "/proc/stat") == 0 ||
103 strcmp(path, "/proc/diskstats") == 0 ||
104 strcmp(path, "/proc/swaps") == 0 ||
105 strcmp(path, "/proc/loadavg") == 0) {
25982f5d 106 sb->st_size = 4096;
1f5596dd
CB
107 sb->st_mode = S_IFREG | 00444;
108 sb->st_nlink = 1;
109 return 0;
110 }
111
112 return -ENOENT;
113}
114
2d7bcab7
CB
115__lxcfs_fuse_ops int proc_readdir(const char *path, void *buf,
116 fuse_fill_dir_t filler, off_t offset,
117 struct fuse_file_info *fi)
1f5596dd 118{
f834b6bf
SP
119 if (DIR_FILLER(filler, buf, ".", NULL, 0) != 0 ||
120 DIR_FILLER(filler, buf, "..", NULL, 0) != 0 ||
121 DIR_FILLER(filler, buf, "cpuinfo", NULL, 0) != 0 ||
122 DIR_FILLER(filler, buf, "meminfo", NULL, 0) != 0 ||
123 DIR_FILLER(filler, buf, "stat", NULL, 0) != 0 ||
124 DIR_FILLER(filler, buf, "uptime", NULL, 0) != 0 ||
125 DIR_FILLER(filler, buf, "diskstats", NULL, 0) != 0 ||
126 DIR_FILLER(filler, buf, "swaps", NULL, 0) != 0 ||
127 DIR_FILLER(filler, buf, "loadavg", NULL, 0) != 0)
1f5596dd
CB
128 return -EINVAL;
129
130 return 0;
131}
132
f75d5b75 133static off_t get_procfile_size(const char *path)
1f5596dd 134{
f75d5b75
CB
135 __do_fclose FILE *f = NULL;
136 __do_free char *line = NULL;
1f5596dd
CB
137 size_t len = 0;
138 ssize_t sz, answer = 0;
f75d5b75
CB
139
140 f = fopen(path, "re");
1f5596dd
CB
141 if (!f)
142 return 0;
143
144 while ((sz = getline(&line, &len, f)) != -1)
145 answer += sz;
1f5596dd
CB
146
147 return answer;
148}
149
2d7bcab7 150__lxcfs_fuse_ops int proc_open(const char *path, struct fuse_file_info *fi)
1f5596dd 151{
700dd417 152 __do_free struct file_info *info = NULL;
1f5596dd 153 int type = -1;
1f5596dd
CB
154
155 if (strcmp(path, "/proc/meminfo") == 0)
156 type = LXC_TYPE_PROC_MEMINFO;
157 else if (strcmp(path, "/proc/cpuinfo") == 0)
158 type = LXC_TYPE_PROC_CPUINFO;
159 else if (strcmp(path, "/proc/uptime") == 0)
160 type = LXC_TYPE_PROC_UPTIME;
161 else if (strcmp(path, "/proc/stat") == 0)
162 type = LXC_TYPE_PROC_STAT;
163 else if (strcmp(path, "/proc/diskstats") == 0)
164 type = LXC_TYPE_PROC_DISKSTATS;
165 else if (strcmp(path, "/proc/swaps") == 0)
166 type = LXC_TYPE_PROC_SWAPS;
167 else if (strcmp(path, "/proc/loadavg") == 0)
168 type = LXC_TYPE_PROC_LOADAVG;
169 if (type == -1)
170 return -ENOENT;
171
f1a33645 172 info = zalloc(sizeof(*info));
1f5596dd
CB
173 if (!info)
174 return -ENOMEM;
175
1f5596dd
CB
176 info->type = type;
177
178 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
179
f1a33645 180 info->buf = zalloc(info->buflen);
1f5596dd
CB
181 if (!info->buf)
182 return -ENOMEM;
1f5596dd
CB
183 /* set actual size to buffer size */
184 info->size = info->buflen;
185
700dd417 186 fi->fh = PTR_TO_UINT64(move_ptr(info));
1f5596dd
CB
187 return 0;
188}
189
2d7bcab7 190__lxcfs_fuse_ops int proc_access(const char *path, int mask)
1f5596dd
CB
191{
192 if (strcmp(path, "/proc") == 0 && access(path, R_OK) == 0)
193 return 0;
194
195 /* these are all read-only */
196 if ((mask & ~R_OK) != 0)
197 return -EACCES;
f75d5b75 198
1f5596dd
CB
199 return 0;
200}
201
2d7bcab7 202__lxcfs_fuse_ops int proc_release(const char *path, struct fuse_file_info *fi)
1f5596dd
CB
203{
204 do_release_file_info(fi);
205 return 0;
206}
207
e9712933 208static uint64_t get_memlimit(const char *cgroup, bool swap)
1f5596dd 209{
1f5596dd 210 __do_free char *memlimit_str = NULL;
c83158f2 211 uint64_t memlimit = 0;
f75d5b75 212 int ret;
1f5596dd
CB
213
214 if (swap)
215 ret = cgroup_ops->get_memory_swap_max(cgroup_ops, cgroup, &memlimit_str);
216 else
217 ret = cgroup_ops->get_memory_max(cgroup_ops, cgroup, &memlimit_str);
ee1a885f 218 if (ret > 0 && memlimit_str[0] && safe_uint64(memlimit_str, &memlimit, 10) < 0)
c83158f2 219 lxcfs_error("Failed to convert memlimit %s", memlimit_str);
1f5596dd
CB
220
221 return memlimit;
222}
223
6f88ab0c
JS
224/*
225 * This function taken from glibc-2.32, as POSIX dirname("/some-dir") will
226 * return "/some-dir" as opposed to "/", which breaks `get_min_memlimit()`
227 */
228static char *gnu_dirname(char *path)
229{
230 static const char dot[] = ".";
231 char *last_slash;
232
233 /* Find last '/'. */
234 last_slash = path != NULL ? strrchr(path, '/') : NULL;
235
236 if (last_slash != NULL && last_slash != path && last_slash[1] == '\0') {
237 /* Determine whether all remaining characters are slashes. */
238 char *runp;
239
240 for (runp = last_slash; runp != path; --runp)
241 if (runp[-1] != '/')
242 break;
243
244 /* The '/' is the last character, we have to look further. */
245 if (runp != path)
246 last_slash = memrchr(path, '/', runp - path);
247 }
248
249 if (last_slash != NULL) {
250 /* Determine whether all remaining characters are slashes. */
251 char *runp;
252
253 for (runp = last_slash; runp != path; --runp)
254 if (runp[-1] != '/')
255 break;
256
257 /* Terminate the path. */
258 if (runp == path) {
259 /*
260 * The last slash is the first character in the string.
261 * We have to return "/". As a special case we have to
262 * return "//" if there are exactly two slashes at the
263 * beginning of the string. See XBD 4.10 Path Name
264 * Resolution for more information
265 */
266 if (last_slash == path + 1)
267 ++last_slash;
268 else
269 last_slash = path + 1;
270 } else
271 last_slash = runp;
272
273 last_slash[0] = '\0';
274 } else {
275 /*
276 * This assignment is ill-designed but the XPG specs require to
277 * return a string containing "." in any case no directory part
278 * is found and so a static and constant string is required.
279 */
280 path = (char *)dot;
281 }
282
283 return path;
284}
285
e9712933 286static uint64_t get_min_memlimit(const char *cgroup, bool swap)
1f5596dd
CB
287{
288 __do_free char *copy = NULL;
c83158f2 289 uint64_t memlimit = 0, retlimit = 0;
1f5596dd
CB
290
291 copy = strdup(cgroup);
f75d5b75
CB
292 if (!copy)
293 return log_error_errno(0, ENOMEM, "Failed to allocate memory");
294
1f5596dd
CB
295 retlimit = get_memlimit(copy, swap);
296
6f88ab0c
JS
297 /*
298 * If the cgroup doesn't start with / (probably won't happen), dirname()
299 * will terminate with "" instead of "/"
300 */
301 while (*copy && strcmp(copy, "/") != 0) {
1f5596dd
CB
302 char *it = copy;
303
6f88ab0c 304 it = gnu_dirname(it);
1f5596dd 305 memlimit = get_memlimit(it, swap);
c83158f2 306 if (memlimit > 0 && memlimit < retlimit)
1f5596dd
CB
307 retlimit = memlimit;
308 };
309
310 return retlimit;
311}
312
f75d5b75 313static inline bool startswith(const char *line, const char *pref)
4ec5c9da 314{
f75d5b75 315 return strncmp(line, pref, strlen(pref)) == 0;
4ec5c9da
CB
316}
317
1f5596dd
CB
318static int proc_swaps_read(char *buf, size_t size, off_t offset,
319 struct fuse_file_info *fi)
320{
671febde 321 __do_free char *cgroup = NULL, *memusage_str = NULL, *memswusage_str = NULL;
1f5596dd 322 struct fuse_context *fc = fuse_get_context();
2f2080c1 323 struct lxcfs_opts *opts = (struct lxcfs_opts *)fuse_get_context()->private_data;
c6805016 324 bool wants_swap = opts && !opts->swap_off && liblxcfs_can_use_swap();
99b183fb 325 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
e9712933 326 uint64_t memswlimit = 0, memlimit = 0, memusage = 0, memswusage = 0,
b7b018d0 327 swtotal = 0, swfree = 0, swusage = 0;
1f5596dd
CB
328 ssize_t total_len = 0;
329 ssize_t l = 0;
330 char *cache = d->buf;
331 int ret;
332
333 if (offset) {
334 int left;
335
336 if (offset > d->size)
337 return -EINVAL;
338
339 if (!d->cached)
340 return 0;
341
342 left = d->size - offset;
343 total_len = left > size ? size: left;
344 memcpy(buf, cache + offset, total_len);
345
346 return total_len;
347 }
348
349 pid_t initpid = lookup_initpid_in_store(fc->pid);
350 if (initpid <= 1 || is_shared_pidns(initpid))
351 initpid = fc->pid;
a9f0d623 352
b7b018d0
CB
353 cgroup = get_pid_cgroup(initpid, "memory");
354 if (!cgroup)
1f5596dd 355 return read_file_fuse("/proc/swaps", buf, size, d);
b7b018d0 356 prune_init_slice(cgroup);
1f5596dd 357
b7b018d0 358 memlimit = get_min_memlimit(cgroup, false);
1f5596dd 359
b7b018d0 360 ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str);
1f5596dd
CB
361 if (ret < 0)
362 return 0;
363
c83158f2
CB
364 if (safe_uint64(memusage_str, &memusage, 10) < 0)
365 lxcfs_error("Failed to convert memusage %s", memusage_str);
1f5596dd 366
2f2080c1 367 if (wants_swap) {
b7b018d0
CB
368 memswlimit = get_min_memlimit(cgroup, true);
369 if (memswlimit > 0) {
370 ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str);
371 if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) {
372 if (memlimit > memswlimit)
373 swtotal = 0;
374 else
375 swtotal = (memswlimit - memlimit) / 1024;
376 if (memusage > memswusage || swtotal == 0)
377 swusage = 0;
378 else
379 swusage = (memswusage - memusage) / 1024;
380 if (swtotal >= swusage)
381 swfree = swtotal - swusage;
382 }
2f2080c1 383 }
1f5596dd
CB
384 }
385
386 total_len = snprintf(d->buf, d->size, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
387
388 /* When no mem + swap limit is specified or swapaccount=0*/
389 if (!memswlimit) {
390 __do_free char *line = NULL;
757a63e7 391 __do_free void *fopen_cache = NULL;
1f5596dd
CB
392 __do_fclose FILE *f = NULL;
393 size_t linelen = 0;
394
757a63e7 395 f = fopen_cached("/proc/meminfo", "re", &fopen_cache);
1f5596dd
CB
396 if (!f)
397 return 0;
398
399 while (getline(&line, &linelen, f) != -1) {
400 if (startswith(line, "SwapTotal:"))
b7b018d0 401 sscanf(line, "SwapTotal: %8" PRIu64 " kB", &swtotal);
1f5596dd 402 else if (startswith(line, "SwapFree:"))
b7b018d0 403 sscanf(line, "SwapFree: %8" PRIu64 " kB", &swfree);
1f5596dd
CB
404 }
405 }
406
b7b018d0 407 if (swtotal > 0) {
1f5596dd 408 l = snprintf(d->buf + total_len, d->size - total_len,
e9712933 409 "none%*svirtual\t\t%" PRIu64 "\t%" PRIu64 "\t0\n",
b7b018d0 410 36, " ", swtotal, swfree);
1f5596dd
CB
411 total_len += l;
412 }
413
f75d5b75
CB
414 if (total_len < 0 || l < 0)
415 return log_error(0, "Failed writing to cache");
1f5596dd
CB
416
417 d->cached = 1;
418 d->size = (int)total_len;
419
f75d5b75
CB
420 if (total_len > size)
421 total_len = size;
1f5596dd 422 memcpy(buf, d->buf, total_len);
f75d5b75 423
1f5596dd
CB
424 return total_len;
425}
426
427static void get_blkio_io_value(char *str, unsigned major, unsigned minor,
1ba088ae 428 char *iotype, uint64_t *v)
1f5596dd
CB
429{
430 char *eol;
431 char key[32];
f75d5b75 432 size_t len;
1f5596dd
CB
433
434 memset(key, 0, 32);
435 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
436
1f5596dd 437 *v = 0;
f75d5b75 438 len = strlen(key);
1f5596dd
CB
439 while (*str) {
440 if (startswith(str, key)) {
441 sscanf(str + len, "%lu", v);
442 return;
443 }
444 eol = strchr(str, '\n');
445 if (!eol)
446 return;
447 str = eol+1;
448 }
449}
450
998cdfc9
CB
451struct lxcfs_diskstats {
452 unsigned int major; /* 1 - major number */
453 unsigned int minor; /* 2 - minor mumber */
454 char dev_name[72]; /* 3 - device name */
455 uint64_t read; /* 4 - reads completed successfully */
456 uint64_t read_merged; /* 5 - reads merged */
457 uint64_t read_sectors; /* 6 - sectors read */
458 uint64_t read_ticks; /* 7 - time spent reading (ms) */
459 uint64_t write; /* 8 - writes completed */
460 uint64_t write_merged; /* 9 - writes merged */
461 uint64_t write_sectors; /* 10 - sectors written */
462 uint64_t write_ticks; /* 11 - time spent writing (ms) */
463 uint64_t ios_pgr; /* 12 - I/Os currently in progress */
464 uint64_t total_ticks; /* 13 - time spent doing I/Os (ms) */
465 uint64_t rq_ticks; /* 14 - weighted time spent doing I/Os (ms) */
466 uint64_t discard; /* 15 - discards completed successfully (4.18+) */
467 uint64_t discard_merged; /* 16 - discards merged (4.18+) */
468 uint64_t discard_sectors; /* 17 - sectors discarded (4.18+) */
469 uint64_t discard_ticks; /* 18 - time spent discarding (4.18+) */
470};
471
1f5596dd
CB
472static int proc_diskstats_read(char *buf, size_t size, off_t offset,
473 struct fuse_file_info *fi)
474{
475 __do_free char *cg = NULL, *io_serviced_str = NULL,
476 *io_merged_str = NULL, *io_service_bytes_str = NULL,
477 *io_wait_time_str = NULL, *io_service_time_str = NULL,
478 *line = NULL;
757a63e7 479 __do_free void *fopen_cache = NULL;
1f5596dd
CB
480 __do_fclose FILE *f = NULL;
481 struct fuse_context *fc = fuse_get_context();
99b183fb 482 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
998cdfc9
CB
483 struct lxcfs_diskstats stats = {};
484 /* helper fields */
485 uint64_t read_service_time, write_service_time, discard_service_time, read_wait_time,
486 write_wait_time, discard_wait_time;
1f5596dd
CB
487 char *cache = d->buf;
488 size_t cache_size = d->buflen;
489 size_t linelen = 0, total_len = 0;
1f5596dd
CB
490 int i = 0;
491 int ret;
1f5596dd 492
cbfc55fd 493 if (offset) {
1f5596dd
CB
494 int left;
495
496 if (offset > d->size)
497 return -EINVAL;
498
499 if (!d->cached)
500 return 0;
501
502 left = d->size - offset;
503 total_len = left > size ? size: left;
504 memcpy(buf, cache + offset, total_len);
505
506 return total_len;
507 }
508
509 pid_t initpid = lookup_initpid_in_store(fc->pid);
510 if (initpid <= 1 || is_shared_pidns(initpid))
511 initpid = fc->pid;
a9f0d623 512
1f5596dd
CB
513 cg = get_pid_cgroup(initpid, "blkio");
514 if (!cg)
515 return read_file_fuse("/proc/diskstats", buf, size, d);
516 prune_init_slice(cg);
517
518 ret = cgroup_ops->get_io_serviced(cgroup_ops, cg, &io_serviced_str);
519 if (ret < 0) {
520 if (ret == -EOPNOTSUPP)
521 return read_file_fuse("/proc/diskstats", buf, size, d);
522 }
523
524 ret = cgroup_ops->get_io_merged(cgroup_ops, cg, &io_merged_str);
525 if (ret < 0) {
526 if (ret == -EOPNOTSUPP)
527 return read_file_fuse("/proc/diskstats", buf, size, d);
528 }
529
530 ret = cgroup_ops->get_io_service_bytes(cgroup_ops, cg, &io_service_bytes_str);
531 if (ret < 0) {
532 if (ret == -EOPNOTSUPP)
533 return read_file_fuse("/proc/diskstats", buf, size, d);
534 }
535
536 ret = cgroup_ops->get_io_wait_time(cgroup_ops, cg, &io_wait_time_str);
537 if (ret < 0) {
538 if (ret == -EOPNOTSUPP)
539 return read_file_fuse("/proc/diskstats", buf, size, d);
540 }
541
542 ret = cgroup_ops->get_io_service_time(cgroup_ops, cg, &io_service_time_str);
543 if (ret < 0) {
544 if (ret == -EOPNOTSUPP)
545 return read_file_fuse("/proc/diskstats", buf, size, d);
546 }
547
757a63e7 548 f = fopen_cached("/proc/diskstats", "re", &fopen_cache);
1f5596dd
CB
549 if (!f)
550 return 0;
551
552 while (getline(&line, &linelen, f) != -1) {
553 ssize_t l;
554 char lbuf[256];
555
998cdfc9 556 i = sscanf(line, "%u %u %71s", &stats.major, &stats.minor, stats.dev_name);
1f5596dd
CB
557 if (i != 3)
558 continue;
559
998cdfc9
CB
560 get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Read", &stats.read);
561 get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Write", &stats.write);
562 get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Discard", &stats.discard);
563
564 get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Read", &stats.read_merged);
565 get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Write", &stats.write_merged);
566 get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Discard", &stats.discard_merged);
567
568 get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Read", &stats.read_sectors);
569 stats.read_sectors = stats.read_sectors / 512;
570 get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Write", &stats.write_sectors);
571 stats.write_sectors = stats.write_sectors / 512;
572 get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Discard", &stats.discard_sectors);
573 stats.discard_sectors = stats.discard_sectors / 512;
574
575 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Read", &read_service_time);
576 read_service_time = read_service_time / 1000000;
577 get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Read", &read_wait_time);
578 read_wait_time = read_wait_time / 1000000;
579 stats.read_ticks = read_service_time + read_wait_time;
580
581 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Write", &write_service_time);
582 write_service_time = write_service_time / 1000000;
583 get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Write", &write_wait_time);
584 write_wait_time = write_wait_time / 1000000;
585 stats.write_ticks = write_service_time + write_wait_time;
586
587 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Discard", &discard_service_time);
588 discard_service_time = discard_service_time / 1000000;
589 get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Discard", &discard_wait_time);
590 discard_wait_time = discard_wait_time / 1000000;
591 stats.discard_ticks = discard_service_time + discard_wait_time;
592
593 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Total", &stats.total_ticks);
594 stats.total_ticks = stats.total_ticks / 1000000;
1f5596dd
CB
595
596 memset(lbuf, 0, 256);
998cdfc9
CB
597 if (stats.read || stats.write || stats.read_merged || stats.write_merged ||
598 stats.read_sectors || stats.write_sectors || stats.read_ticks ||
599 stats.write_ticks || stats.ios_pgr || stats.total_ticks || stats.rq_ticks ||
600 stats.discard_merged || stats.discard_sectors || stats.discard_ticks)
601 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
602 stats.major,
603 stats.minor,
604 stats.dev_name,
605 stats.read,
606 stats.read_merged,
607 stats.read_sectors,
608 stats.read_ticks,
609 stats.write,
610 stats.write_merged,
611 stats.write_sectors,
612 stats.write_ticks,
613 stats.ios_pgr,
614 stats.total_ticks,
615 stats.rq_ticks,
616 stats.discard_merged,
617 stats.discard_sectors,
618 stats.discard_ticks);
1f5596dd
CB
619 else
620 continue;
621
622 l = snprintf(cache, cache_size, "%s", lbuf);
f75d5b75
CB
623 if (l < 0)
624 return log_error(0, "Failed to write cache");
625 if (l >= cache_size)
626 return log_error(0, "Write to cache was truncated");
627
1f5596dd
CB
628 cache += l;
629 cache_size -= l;
630 total_len += l;
631 }
632
633 d->cached = 1;
634 d->size = total_len;
cbfc55fd
CB
635 if (total_len > size)
636 total_len = size;
1f5596dd
CB
637 memcpy(buf, d->buf, total_len);
638
639 return total_len;
640}
641
642#if RELOADTEST
12a60884 643static inline void iwashere(void)
1f5596dd 644{
12a60884 645 mknod("/tmp/lxcfs-iwashere", S_IFREG, 0644);
1f5596dd
CB
646}
647#endif
648
c9c93749
CB
649/*
650 * This function retrieves the busy time of a group of tasks by looking at
1f5596dd
CB
651 * cpuacct.usage. Unfortunately, this only makes sense when the container has
652 * been given it's own cpuacct cgroup. If not, this function will take the busy
653 * time of all other taks that do not actually belong to the container into
654 * account as well. If someone has a clever solution for this please send a
655 * patch!
656 */
657static double get_reaper_busy(pid_t task)
658{
659 __do_free char *cgroup = NULL, *usage_str = NULL;
e9712933 660 uint64_t usage = 0;
1f5596dd
CB
661 pid_t initpid;
662
663 initpid = lookup_initpid_in_store(task);
664 if (initpid <= 0)
665 return 0;
666
667 cgroup = get_pid_cgroup(initpid, "cpuacct");
668 if (!cgroup)
669 return 0;
670 prune_init_slice(cgroup);
c9c93749 671
e9712933 672 if (!cgroup_ops->get(cgroup_ops, "cpuacct", cgroup, "cpuacct.usage", &usage_str))
1f5596dd
CB
673 return 0;
674
c83158f2
CB
675 if (safe_uint64(usage_str, &usage, 10) < 0)
676 lxcfs_error("Failed to convert usage %s", usage_str);
677
1f5596dd
CB
678 return ((double)usage / 1000000000);
679}
680
681static uint64_t get_reaper_start_time(pid_t pid)
682{
757a63e7 683 __do_free void *fopen_cache = NULL;
12a60884 684 __do_fclose FILE *f = NULL;
1f5596dd 685 int ret;
1f5596dd 686 uint64_t starttime;
c9c93749
CB
687 char path[STRLITERALLEN("/proc/") + LXCFS_NUMSTRLEN64 +
688 STRLITERALLEN("/stat") + 1];
1f5596dd
CB
689 pid_t qpid;
690
691 qpid = lookup_initpid_in_store(pid);
c9c93749
CB
692 if (qpid <= 0)
693 return ret_errno(EINVAL);
1f5596dd 694
c9c93749
CB
695 ret = snprintf(path, sizeof(path), "/proc/%d/stat", qpid);
696 if (ret < 0 || (size_t)ret >= sizeof(path))
697 return ret_errno(EINVAL);
1f5596dd 698
757a63e7 699 f = fopen_cached(path, "re", &fopen_cache);
c9c93749
CB
700 if (!f)
701 return ret_errno(EINVAL);
1f5596dd
CB
702
703 /* Note that the *scanf() argument supression requires that length
704 * modifiers such as "l" are omitted. Otherwise some compilers will yell
705 * at us. It's like telling someone you're not married and then asking
706 * if you can bring your wife to the party.
707 */
708 ret = fscanf(f, "%*d " /* (1) pid %d */
709 "%*s " /* (2) comm %s */
710 "%*c " /* (3) state %c */
711 "%*d " /* (4) ppid %d */
712 "%*d " /* (5) pgrp %d */
713 "%*d " /* (6) session %d */
714 "%*d " /* (7) tty_nr %d */
715 "%*d " /* (8) tpgid %d */
716 "%*u " /* (9) flags %u */
717 "%*u " /* (10) minflt %lu */
718 "%*u " /* (11) cminflt %lu */
719 "%*u " /* (12) majflt %lu */
720 "%*u " /* (13) cmajflt %lu */
721 "%*u " /* (14) utime %lu */
722 "%*u " /* (15) stime %lu */
723 "%*d " /* (16) cutime %ld */
724 "%*d " /* (17) cstime %ld */
725 "%*d " /* (18) priority %ld */
726 "%*d " /* (19) nice %ld */
727 "%*d " /* (20) num_threads %ld */
728 "%*d " /* (21) itrealvalue %ld */
729 "%" PRIu64, /* (22) starttime %llu */
730 &starttime);
12a60884 731 if (ret != 1)
c9c93749 732 return ret_errno(EINVAL);
1f5596dd 733
12a60884 734 return ret_set_errno(starttime, 0);
1f5596dd
CB
735}
736
737static double get_reaper_start_time_in_sec(pid_t pid)
738{
739 uint64_t clockticks, ticks_per_sec;
740 int64_t ret;
741 double res = 0;
742
743 clockticks = get_reaper_start_time(pid);
c9c93749 744 if (clockticks <= 0)
f75d5b75 745 return log_debug(0, "Failed to retrieve start time of pid %d", pid);
1f5596dd
CB
746
747 ret = sysconf(_SC_CLK_TCK);
c9c93749 748 if (ret < 0)
f75d5b75 749 return log_debug(0, "Failed to determine number of clock ticks in a second");
1f5596dd
CB
750
751 ticks_per_sec = (uint64_t)ret;
752 res = (double)clockticks / ticks_per_sec;
753 return res;
754}
755
756static double get_reaper_age(pid_t pid)
757{
758 uint64_t uptime_ms;
759 double procstart, procage;
760
c9c93749
CB
761 /*
762 * We need to substract the time the process has started since system
1f5596dd
CB
763 * boot minus the time when the system has started to get the actual
764 * reaper age.
765 */
766 procstart = get_reaper_start_time_in_sec(pid);
767 procage = procstart;
768 if (procstart > 0) {
769 int ret;
770 struct timespec spec;
771
772 ret = clock_gettime(CLOCK_BOOTTIME, &spec);
773 if (ret < 0)
774 return 0;
775
1f5596dd
CB
776 uptime_ms = (spec.tv_sec * 1000) + (spec.tv_nsec * 1e-6);
777 procage = (uptime_ms - (procstart * 1000)) / 1000;
778 }
779
780 return procage;
781}
782
783/*
784 * We read /proc/uptime and reuse its second field.
785 * For the first field, we use the mtime for the reaper for
786 * the calling pid as returned by getreaperage
787 */
788static int proc_uptime_read(char *buf, size_t size, off_t offset,
789 struct fuse_file_info *fi)
790{
791 struct fuse_context *fc = fuse_get_context();
99b183fb 792 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1f5596dd 793 char *cache = d->buf;
d7718002 794 ssize_t total_len = 0, ret = 0;
ea725aba 795 double busytime, idletime, reaperage;
1f5596dd
CB
796
797#if RELOADTEST
798 iwashere();
799#endif
800
f75d5b75
CB
801 if (offset) {
802 int left;
803
1f5596dd
CB
804 if (offset > d->size)
805 return -EINVAL;
f75d5b75 806
d7718002
CB
807 if (!d->cached)
808 return 0;
809
f75d5b75
CB
810 left = d->size - offset;
811 total_len = left > size ? size : left;
1f5596dd 812 memcpy(buf, cache + offset, total_len);
f75d5b75 813
1f5596dd
CB
814 return total_len;
815 }
816
817 reaperage = get_reaper_age(fc->pid);
f75d5b75
CB
818 /*
819 * To understand why this is done, please read the comment to the
1f5596dd
CB
820 * get_reaper_busy() function.
821 */
822 idletime = reaperage;
ea725aba 823 busytime = get_reaper_busy(fc->pid);
1f5596dd
CB
824 if (reaperage >= busytime)
825 idletime = reaperage - busytime;
826
d7718002
CB
827 ret = snprintf(d->buf, d->buflen, "%.2lf %.2lf\n", reaperage, idletime);
828 if (ret < 0 || ret >= d->buflen)
c9c93749 829 return read_file_fuse("/proc/uptime", buf, size, d);
d7718002 830 total_len = ret;
1f5596dd 831
1f5596dd 832 d->cached = 1;
d7718002 833 d->size = total_len;
f75d5b75
CB
834 if (total_len > size)
835 total_len = size;
1f5596dd 836 memcpy(buf, d->buf, total_len);
c9c93749 837
1f5596dd
CB
838 return total_len;
839}
840
841#define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2)
842static int proc_stat_read(char *buf, size_t size, off_t offset,
843 struct fuse_file_info *fi)
844{
845 __do_free char *cg = NULL, *cpuset = NULL, *line = NULL;
757a63e7 846 __do_free void *fopen_cache = NULL;
1f5596dd
CB
847 __do_free struct cpuacct_usage *cg_cpu_usage = NULL;
848 __do_fclose FILE *f = NULL;
849 struct fuse_context *fc = fuse_get_context();
8044f626 850 struct lxcfs_opts *opts = (struct lxcfs_opts *)fc->private_data;
99b183fb 851 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1f5596dd
CB
852 size_t linelen = 0, total_len = 0;
853 int curcpu = -1; /* cpu numbering starts at 0 */
854 int physcpu = 0;
1ba088ae
CB
855 uint64_t user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0,
856 softirq = 0, steal = 0, guest = 0, guest_nice = 0;
857 uint64_t user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0,
858 iowait_sum = 0, irq_sum = 0, softirq_sum = 0, steal_sum = 0,
859 guest_sum = 0, guest_nice_sum = 0;
1f5596dd
CB
860 char cpuall[CPUALL_MAX_SIZE];
861 /* reserve for cpu all */
862 char *cache = d->buf + CPUALL_MAX_SIZE;
863 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
864 int cg_cpu_usage_size = 0;
865
7b367480
CB
866 if (offset) {
867 int left;
868
1f5596dd
CB
869 if (offset > d->size)
870 return -EINVAL;
7b367480 871
1f5596dd
CB
872 if (!d->cached)
873 return 0;
7b367480
CB
874
875 left = d->size - offset;
876 total_len = left > size ? size : left;
1f5596dd 877 memcpy(buf, d->buf + offset, total_len);
7b367480 878
1f5596dd
CB
879 return total_len;
880 }
881
882 pid_t initpid = lookup_initpid_in_store(fc->pid);
a9f0d623 883 if (initpid <= 1 || is_shared_pidns(initpid))
1f5596dd
CB
884 initpid = fc->pid;
885
886 /*
887 * when container run with host pid namespace initpid == 1, cgroup will "/"
888 * we should return host os's /proc contents.
889 * in some case cpuacct_usage.all in "/" will larger then /proc/stat
890 */
605e157d 891 if (initpid == 1)
6300e6c6 892 return read_file_fuse("/proc/stat", buf, size, d);
1f5596dd
CB
893
894 cg = get_pid_cgroup(initpid, "cpuset");
1f5596dd
CB
895 if (!cg)
896 return read_file_fuse("/proc/stat", buf, size, d);
897 prune_init_slice(cg);
898
899 cpuset = get_cpuset(cg);
900 if (!cpuset)
901 return 0;
902
77711d7a
CB
903 f = fopen_cached("/proc/stat", "re", &fopen_cache);
904 if (!f)
905 return 0;
906
1494771e
CB
907 /* Skip first system cpu line. */
908 if (getline(&line, &linelen, f) < 0)
909 return log_error(0, "proc_stat_read read first line failed");
910
1f5596dd
CB
911 /*
912 * Read cpuacct.usage_all for all CPUs.
913 * If the cpuacct cgroup is present, it is used to calculate the container's
914 * CPU usage. If not, values from the host's /proc/stat are used.
915 */
f9434b9a
CB
916 if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage, &cg_cpu_usage_size) == 0) {
917 if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs) {
918 total_len = cpuview_proc_stat(cg, cpuset, cg_cpu_usage,
919 cg_cpu_usage_size, f,
920 d->buf, d->buflen);
921 goto out;
922 }
923 } else {
924 lxcfs_v("proc_stat_read failed to read from cpuacct, falling back to the host's /proc/stat");
925 }
1f5596dd 926
1f5596dd
CB
927 while (getline(&line, &linelen, f) != -1) {
928 ssize_t l;
929 char cpu_char[10]; /* That's a lot of cores */
930 char *c;
931 uint64_t all_used, cg_used, new_idle;
932 int ret;
933
934 if (strlen(line) == 0)
935 continue;
936 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
937 /* not a ^cpuN line containing a number N, just print it */
938 l = snprintf(cache, cache_size, "%s", line);
f75d5b75
CB
939 if (l < 0)
940 return log_error(0, "Failed to write cache");
941 if (l >= cache_size)
942 return log_error(0, "Write to cache was truncated");
943
1f5596dd
CB
944 cache += l;
945 cache_size -= l;
946 total_len += l;
f75d5b75 947
1f5596dd
CB
948 continue;
949 }
950
951 if (sscanf(cpu_char, "%d", &physcpu) != 1)
952 continue;
f75d5b75 953
1f5596dd
CB
954 if (!cpu_in_cpuset(physcpu, cpuset))
955 continue;
f75d5b75 956
2b8eff1d 957 curcpu++;
1f5596dd
CB
958
959 ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
960 &user,
961 &nice,
962 &system,
963 &idle,
964 &iowait,
965 &irq,
966 &softirq,
967 &steal,
968 &guest,
969 &guest_nice);
1f5596dd
CB
970 if (ret != 10 || !cg_cpu_usage) {
971 c = strchr(line, ' ');
972 if (!c)
973 continue;
1f5596dd 974
f75d5b75
CB
975 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
976 if (l < 0)
977 return log_error(0, "Failed to write cache");
978 if (l >= cache_size)
979 return log_error(0, "Write to cache was truncated");
1f5596dd
CB
980
981 cache += l;
982 cache_size -= l;
983 total_len += l;
984
985 if (ret != 10)
986 continue;
987 }
988
989 if (cg_cpu_usage) {
990 if (physcpu >= cg_cpu_usage_size)
991 break;
992
993 all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice;
994 cg_used = cg_cpu_usage[physcpu].user + cg_cpu_usage[physcpu].system;
995
996 if (all_used >= cg_used) {
997 new_idle = idle + (all_used - cg_used);
998
999 } else {
2b8eff1d
CB
1000 lxcfs_error("cpu%d from %s has unexpected cpu time: %" PRIu64 " in /proc/stat, %" PRIu64 " in cpuacct.usage_all; unable to determine idle time",
1001 curcpu, cg, all_used, cg_used);
1f5596dd
CB
1002 new_idle = idle;
1003 }
1004
2b8eff1d
CB
1005 l = snprintf(cache, cache_size,
1006 "cpu%d %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n",
1007 curcpu, cg_cpu_usage[physcpu].user,
1008 cg_cpu_usage[physcpu].system, new_idle);
f75d5b75
CB
1009 if (l < 0)
1010 return log_error(0, "Failed to write cache");
1011 if (l >= cache_size)
1012 return log_error(0, "Write to cache was truncated");
1f5596dd
CB
1013
1014 cache += l;
1015 cache_size -= l;
1016 total_len += l;
1017
1018 user_sum += cg_cpu_usage[physcpu].user;
1019 system_sum += cg_cpu_usage[physcpu].system;
1020 idle_sum += new_idle;
1f5596dd
CB
1021 } else {
1022 user_sum += user;
1023 nice_sum += nice;
1024 system_sum += system;
1025 idle_sum += idle;
1026 iowait_sum += iowait;
1027 irq_sum += irq;
1028 softirq_sum += softirq;
1029 steal_sum += steal;
1030 guest_sum += guest;
1031 guest_nice_sum += guest_nice;
1032 }
1033 }
1034
1035 cache = d->buf;
1036
1037 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
1038 user_sum,
1039 nice_sum,
1040 system_sum,
1041 idle_sum,
1042 iowait_sum,
1043 irq_sum,
1044 softirq_sum,
1045 steal_sum,
1046 guest_sum,
1047 guest_nice_sum);
1048 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE) {
1049 memcpy(cache, cpuall, cpuall_len);
1050 cache += cpuall_len;
1051 } else {
1052 /* shouldn't happen */
f75d5b75 1053 lxcfs_error("proc_stat_read copy cpuall failed, cpuall_len=%d", cpuall_len);
1f5596dd
CB
1054 cpuall_len = 0;
1055 }
1056
1057 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
1058 total_len += cpuall_len;
1059
1060out:
1061 d->cached = 1;
1062 d->size = total_len;
1063 if (total_len > size)
1064 total_len = size;
1065
1066 memcpy(buf, d->buf, total_len);
1067 return total_len;
1068}
1069
1070/* Note that "memory.stat" in cgroup2 is hierarchical by default. */
acff9786 1071static bool cgroup_parse_memory_stat(const char *cgroup, struct memory_stat *mstat)
1f5596dd 1072{
05b7a16d 1073 __do_close int fd = -EBADF;
acff9786
CB
1074 __do_fclose FILE *f = NULL;
1075 __do_free char *line = NULL;
28519477 1076 __do_free void *fdopen_cache = NULL;
acff9786
CB
1077 bool unified;
1078 size_t len = 0;
1079 ssize_t linelen;
1f5596dd 1080
acff9786
CB
1081 fd = cgroup_ops->get_memory_stats_fd(cgroup_ops, cgroup);
1082 if (fd < 0)
1083 return false;
1084
28519477 1085 f = fdopen_cached(fd, "re", &fdopen_cache);
acff9786
CB
1086 if (!f)
1087 return false;
acff9786
CB
1088
1089 unified = pure_unified_layout(cgroup_ops);
1090 while ((linelen = getline(&line, &len, f)) != -1) {
1091 if (!unified && startswith(line, "hierarchical_memory_limit")) {
1092 sscanf(line, "hierarchical_memory_limit %" PRIu64, &(mstat->hierarchical_memory_limit));
1093 } else if (!unified && startswith(line, "hierarchical_memsw_limit")) {
1094 sscanf(line, "hierarchical_memsw_limit %" PRIu64, &(mstat->hierarchical_memsw_limit));
91d63a9e
IM
1095 } else if (startswith(line, unified ? "file" :"total_cache")) {
1096 sscanf(line, unified ? "file %" PRIu64 : "total_cache %" PRIu64, &(mstat->total_cache));
acff9786
CB
1097 } else if (!unified && startswith(line, "total_rss")) {
1098 sscanf(line, "total_rss %" PRIu64, &(mstat->total_rss));
1099 } else if (!unified && startswith(line, "total_rss_huge")) {
1100 sscanf(line, "total_rss_huge %" PRIu64, &(mstat->total_rss_huge));
1101 } else if (startswith(line, unified ? "shmem" : "total_shmem")) {
1102 sscanf(line, unified ? "shmem %" PRIu64 : "total_shmem %" PRIu64, &(mstat->total_shmem));
1103 } else if (startswith(line, unified ? "file_mapped" : "total_mapped_file")) {
1104 sscanf(line, unified ? "file_mapped %" PRIu64 : "total_mapped_file %" PRIu64, &(mstat->total_mapped_file));
1105 } else if (!unified && startswith(line, "total_dirty")) {
1106 sscanf(line, "total_dirty %" PRIu64, &(mstat->total_dirty));
1107 } else if (!unified && startswith(line, "total_writeback")) {
1108 sscanf(line, "total_writeback %" PRIu64, &(mstat->total_writeback));
1109 } else if (!unified && startswith(line, "total_swap")) {
1110 sscanf(line, "total_swap %" PRIu64, &(mstat->total_swap));
1111 } else if (!unified && startswith(line, "total_pgpgin")) {
1112 sscanf(line, "total_pgpgin %" PRIu64, &(mstat->total_pgpgin));
1113 } else if (!unified && startswith(line, "total_pgpgout")) {
1114 sscanf(line, "total_pgpgout %" PRIu64, &(mstat->total_pgpgout));
1115 } else if (startswith(line, unified ? "pgfault" : "total_pgfault")) {
1116 sscanf(line, unified ? "pgfault %" PRIu64 : "total_pgfault %" PRIu64, &(mstat->total_pgfault));
1117 } else if (startswith(line, unified ? "pgmajfault" : "total_pgmajfault")) {
1118 sscanf(line, unified ? "pgmajfault %" PRIu64 : "total_pgmajfault %" PRIu64, &(mstat->total_pgmajfault));
1119 } else if (startswith(line, unified ? "inactive_anon" : "total_inactive_anon")) {
1120 sscanf(line, unified ? "inactive_anon %" PRIu64 : "total_inactive_anon %" PRIu64, &(mstat->total_inactive_anon));
1121 } else if (startswith(line, unified ? "active_anon" : "total_active_anon")) {
1122 sscanf(line, unified ? "active_anon %" PRIu64 : "total_active_anon %" PRIu64, &(mstat->total_active_anon));
1123 } else if (startswith(line, unified ? "inactive_file" : "total_inactive_file")) {
1124 sscanf(line, unified ? "inactive_file %" PRIu64 : "total_inactive_file %" PRIu64, &(mstat->total_inactive_file));
1125 } else if (startswith(line, unified ? "active_file" : "total_active_file")) {
1126 sscanf(line, unified ? "active_file %" PRIu64 : "total_active_file %" PRIu64, &(mstat->total_active_file));
1127 } else if (startswith(line, unified ? "unevictable" : "total_unevictable")) {
1128 sscanf(line, unified ? "unevictable %" PRIu64 : "total_unevictable %" PRIu64, &(mstat->total_unevictable));
1f5596dd 1129 }
1f5596dd 1130 }
acff9786
CB
1131
1132 return true;
1f5596dd
CB
1133}
1134
1f5596dd
CB
1135static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1136 struct fuse_file_info *fi)
1137{
63f35cc0 1138 __do_free char *cgroup = NULL, *line = NULL, *memusage_str = NULL,
b7b018d0 1139 *memswusage_str = NULL;
757a63e7 1140 __do_free void *fopen_cache = NULL;
1f5596dd
CB
1141 __do_fclose FILE *f = NULL;
1142 struct fuse_context *fc = fuse_get_context();
9973cc06 1143 struct lxcfs_opts *opts = (struct lxcfs_opts *)fuse_get_context()->private_data;
c6805016 1144 bool wants_swap = opts && !opts->swap_off && liblxcfs_can_use_swap(), host_swap = false;
99b183fb 1145 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
acff9786 1146 uint64_t memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0,
b7b018d0 1147 hosttotal = 0, swfree = 0, swusage = 0, swtotal = 0;
334a14f9 1148 struct memory_stat mstat = {};
1f5596dd
CB
1149 size_t linelen = 0, total_len = 0;
1150 char *cache = d->buf;
1151 size_t cache_size = d->buflen;
1152 int ret;
1153
1154 if (offset) {
1155 int left;
1156
1157 if (offset > d->size)
1158 return -EINVAL;
1159
1160 if (!d->cached)
1161 return 0;
1162
1163 left = d->size - offset;
1164 total_len = left > size ? size : left;
1165 memcpy(buf, cache + offset, total_len);
1166
1167 return total_len;
1168 }
1169
1170 pid_t initpid = lookup_initpid_in_store(fc->pid);
1171 if (initpid <= 1 || is_shared_pidns(initpid))
1172 initpid = fc->pid;
1173
1174 cgroup = get_pid_cgroup(initpid, "memory");
1175 if (!cgroup)
1176 return read_file_fuse("/proc/meminfo", buf, size, d);
1177
1178 prune_init_slice(cgroup);
1179
b7b018d0 1180 /* memory limits */
1f5596dd
CB
1181 ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str);
1182 if (ret < 0)
b0f33646 1183 return read_file_fuse("/proc/meminfo", buf, size, d);
1f5596dd 1184
b7b018d0
CB
1185 if (safe_uint64(memusage_str, &memusage, 10) < 0)
1186 lxcfs_error("Failed to convert memusage %s", memusage_str);
1187
acff9786 1188 if (!cgroup_parse_memory_stat(cgroup, &mstat))
b0f33646 1189 return read_file_fuse("/proc/meminfo", buf, size, d);
1f5596dd 1190
b7b018d0
CB
1191 memlimit = get_min_memlimit(cgroup, false);
1192
1f5596dd
CB
1193 /*
1194 * Following values are allowed to fail, because swapaccount might be
1195 * turned off for current kernel.
1196 */
2f2080c1 1197 if (wants_swap) {
b7b018d0
CB
1198 memswlimit = get_min_memlimit(cgroup, true);
1199 if (memswlimit > 0) {
2f2080c1 1200 ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str);
b7b018d0
CB
1201 if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) {
1202 if (memlimit > memswlimit)
1203 swtotal = 0;
1204 else
1205 swtotal = (memswlimit - memlimit) / 1024;
1206 if (memusage > memswusage || swtotal == 0)
1207 swusage = 0;
1208 else
1209 swusage = (memswusage - memusage) / 1024;
6bfe1016 1210 }
2f2080c1 1211 }
1f5596dd
CB
1212 }
1213
757a63e7 1214 f = fopen_cached("/proc/meminfo", "re", &fopen_cache);
1f5596dd 1215 if (!f)
b0f33646 1216 return read_file_fuse("/proc/meminfo", buf, size, d);
1f5596dd 1217
b7b018d0
CB
1218 memusage /= 1024;
1219 memlimit /= 1024;
1f5596dd
CB
1220 while (getline(&line, &linelen, f) != -1) {
1221 ssize_t l;
1222 char *printme, lbuf[100];
1223
1224 memset(lbuf, 0, 100);
1225 if (startswith(line, "MemTotal:")) {
acff9786 1226 sscanf(line+sizeof("MemTotal:")-1, "%" PRIu64, &hosttotal);
114eb8b8
CB
1227 if (memlimit == 0)
1228 memlimit = hosttotal;
1229
1f5596dd
CB
1230 if (hosttotal < memlimit)
1231 memlimit = hosttotal;
acff9786 1232 snprintf(lbuf, 100, "MemTotal: %8" PRIu64 " kB\n", memlimit);
1f5596dd
CB
1233 printme = lbuf;
1234 } else if (startswith(line, "MemFree:")) {
acff9786 1235 snprintf(lbuf, 100, "MemFree: %8" PRIu64 " kB\n", memlimit - memusage);
1f5596dd
CB
1236 printme = lbuf;
1237 } else if (startswith(line, "MemAvailable:")) {
acff9786 1238 snprintf(lbuf, 100, "MemAvailable: %8" PRIu64 " kB\n", memlimit - memusage + mstat.total_cache / 1024);
1f5596dd 1239 printme = lbuf;
07c90197 1240 } else if (startswith(line, "SwapTotal:")) {
b7b018d0
CB
1241 if (wants_swap) {
1242 uint64_t hostswtotal = 0;
1243
1244 sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal);
1245
6da2a084
CB
1246 /*
1247 * If swtotal is 0 it should mean that
1248 * memory.memsw.limit_in_bytes and
1249 * memory.limit_in_bytes are both unlimited or
1250 * both set to the same value. In both cases we
1251 * have no idea what the technical swap limit
1252 * is supposed to be (It's a shared limit
1253 * anyway.) so fallback to the host's values in
1254 * that case too.
1255 */
1256 if ((hostswtotal < swtotal) || swtotal == 0) {
b7b018d0
CB
1257 swtotal = hostswtotal;
1258 host_swap = true;
1259 }
1260 }
1261
6bfe1016 1262 snprintf(lbuf, 100, "SwapTotal: %8" PRIu64 " kB\n", swtotal);
1f5596dd 1263 printme = lbuf;
07c90197 1264 } else if (startswith(line, "SwapFree:")) {
b7b018d0
CB
1265 if (wants_swap) {
1266 uint64_t hostswfree = 0;
6bfe1016 1267
b7b018d0
CB
1268 if (host_swap) {
1269 sscanf(line + STRLITERALLEN("SwapFree:"), "%" PRIu64, &hostswfree);
1270 swfree = hostswfree;
1271 } else if (swtotal >= swusage) {
1272 swfree = swtotal - swusage;
1273 }
07c90197 1274 }
b7b018d0 1275
6bfe1016 1276 snprintf(lbuf, 100, "SwapFree: %8" PRIu64 " kB\n", swfree);
1f5596dd
CB
1277 printme = lbuf;
1278 } else if (startswith(line, "Slab:")) {
acff9786 1279 snprintf(lbuf, 100, "Slab: %8" PRIu64 " kB\n", (uint64_t)0);
1f5596dd
CB
1280 printme = lbuf;
1281 } else if (startswith(line, "Buffers:")) {
acff9786 1282 snprintf(lbuf, 100, "Buffers: %8" PRIu64 " kB\n", (uint64_t)0);
1f5596dd
CB
1283 printme = lbuf;
1284 } else if (startswith(line, "Cached:")) {
acff9786
CB
1285 snprintf(lbuf, 100, "Cached: %8" PRIu64 " kB\n",
1286 mstat.total_cache / 1024);
1f5596dd
CB
1287 printme = lbuf;
1288 } else if (startswith(line, "SwapCached:")) {
acff9786 1289 snprintf(lbuf, 100, "SwapCached: %8" PRIu64 " kB\n", (uint64_t)0);
1f5596dd
CB
1290 printme = lbuf;
1291 } else if (startswith(line, "Active:")) {
acff9786
CB
1292 snprintf(lbuf, 100, "Active: %8" PRIu64 " kB\n",
1293 (mstat.total_active_anon +
1294 mstat.total_active_file) /
1295 1024);
1f5596dd
CB
1296 printme = lbuf;
1297 } else if (startswith(line, "Inactive:")) {
acff9786
CB
1298 snprintf(lbuf, 100, "Inactive: %8" PRIu64 " kB\n",
1299 (mstat.total_inactive_anon +
1300 mstat.total_inactive_file) /
1301 1024);
1f5596dd 1302 printme = lbuf;
659b0278 1303 } else if (startswith(line, "Active(anon):")) {
acff9786
CB
1304 snprintf(lbuf, 100, "Active(anon): %8" PRIu64 " kB\n",
1305 mstat.total_active_anon / 1024);
1f5596dd 1306 printme = lbuf;
659b0278 1307 } else if (startswith(line, "Inactive(anon):")) {
acff9786
CB
1308 snprintf(lbuf, 100, "Inactive(anon): %8" PRIu64 " kB\n",
1309 mstat.total_inactive_anon / 1024);
1f5596dd 1310 printme = lbuf;
659b0278 1311 } else if (startswith(line, "Active(file):")) {
acff9786
CB
1312 snprintf(lbuf, 100, "Active(file): %8" PRIu64 " kB\n",
1313 mstat.total_active_file / 1024);
1f5596dd 1314 printme = lbuf;
659b0278 1315 } else if (startswith(line, "Inactive(file):")) {
acff9786
CB
1316 snprintf(lbuf, 100, "Inactive(file): %8" PRIu64 " kB\n",
1317 mstat.total_inactive_file / 1024);
1f5596dd 1318 printme = lbuf;
659b0278 1319 } else if (startswith(line, "Unevictable:")) {
acff9786
CB
1320 snprintf(lbuf, 100, "Unevictable: %8" PRIu64 " kB\n",
1321 mstat.total_unevictable / 1024);
1322 printme = lbuf;
659b0278 1323 } else if (startswith(line, "Dirty:")) {
acff9786
CB
1324 snprintf(lbuf, 100, "Dirty: %8" PRIu64 " kB\n",
1325 mstat.total_dirty / 1024);
1326 printme = lbuf;
659b0278 1327 } else if (startswith(line, "Writeback:")) {
acff9786
CB
1328 snprintf(lbuf, 100, "Writeback: %8" PRIu64 " kB\n",
1329 mstat.total_writeback / 1024);
1330 printme = lbuf;
659b0278 1331 } else if (startswith(line, "AnonPages:")) {
acff9786
CB
1332 snprintf(lbuf, 100, "AnonPages: %8" PRIu64 " kB\n",
1333 (mstat.total_active_anon +
1334 mstat.total_inactive_anon - mstat.total_shmem) /
1335 1024);
1336 printme = lbuf;
659b0278 1337 } else if (startswith(line, "Mapped:")) {
acff9786
CB
1338 snprintf(lbuf, 100, "Mapped: %8" PRIu64 " kB\n",
1339 mstat.total_mapped_file / 1024);
1f5596dd 1340 printme = lbuf;
659b0278 1341 } else if (startswith(line, "SReclaimable:")) {
acff9786 1342 snprintf(lbuf, 100, "SReclaimable: %8" PRIu64 " kB\n", (uint64_t)0);
1f5596dd 1343 printme = lbuf;
659b0278 1344 } else if (startswith(line, "SUnreclaim:")) {
acff9786 1345 snprintf(lbuf, 100, "SUnreclaim: %8" PRIu64 " kB\n", (uint64_t)0);
1f5596dd
CB
1346 printme = lbuf;
1347 } else if (startswith(line, "Shmem:")) {
acff9786
CB
1348 snprintf(lbuf, 100, "Shmem: %8" PRIu64 " kB\n",
1349 mstat.total_shmem / 1024);
1f5596dd 1350 printme = lbuf;
659b0278 1351 } else if (startswith(line, "ShmemHugePages:")) {
acff9786 1352 snprintf(lbuf, 100, "ShmemHugePages: %8" PRIu64 " kB\n", (uint64_t)0);
1f5596dd 1353 printme = lbuf;
659b0278 1354 } else if (startswith(line, "ShmemPmdMapped:")) {
acff9786
CB
1355 snprintf(lbuf, 100, "ShmemPmdMapped: %8" PRIu64 " kB\n", (uint64_t)0);
1356 printme = lbuf;
659b0278 1357 } else if (startswith(line, "AnonHugePages:")) {
acff9786
CB
1358 snprintf(lbuf, 100, "AnonHugePages: %8" PRIu64 " kB\n",
1359 mstat.total_rss_huge / 1024);
1f5596dd 1360 printme = lbuf;
acff9786
CB
1361 } else {
1362 printme = line;
1363 }
1f5596dd
CB
1364
1365 l = snprintf(cache, cache_size, "%s", printme);
f75d5b75
CB
1366 if (l < 0)
1367 return log_error(0, "Failed to write cache");
1368 if (l >= cache_size)
1369 return log_error(0, "Write to cache was truncated");
1f5596dd
CB
1370
1371 cache += l;
1372 cache_size -= l;
1373 total_len += l;
1374 }
1375
1376 d->cached = 1;
1377 d->size = total_len;
f75d5b75
CB
1378 if (total_len > size)
1379 total_len = size;
1f5596dd
CB
1380 memcpy(buf, d->buf, total_len);
1381
1382 return total_len;
1383}
1384
2d7bcab7
CB
1385__lxcfs_fuse_ops int proc_read(const char *path, char *buf, size_t size,
1386 off_t offset, struct fuse_file_info *fi)
1f5596dd 1387{
99b183fb 1388 struct file_info *f = INTTYPE_TO_PTR(fi->fh);
1f5596dd
CB
1389
1390 switch (f->type) {
1391 case LXC_TYPE_PROC_MEMINFO:
cbfc55fd
CB
1392 if (liblxcfs_functional())
1393 return proc_meminfo_read(buf, size, offset, fi);
1394
1395 return read_file_fuse_with_offset(LXC_TYPE_PROC_MEMINFO_PATH,
1396 buf, size, offset, f);
1f5596dd 1397 case LXC_TYPE_PROC_CPUINFO:
cbfc55fd
CB
1398 if (liblxcfs_functional())
1399 return proc_cpuinfo_read(buf, size, offset, fi);
1400
1401 return read_file_fuse_with_offset(LXC_TYPE_PROC_CPUINFO_PATH,
1402 buf, size, offset, f);
1f5596dd 1403 case LXC_TYPE_PROC_UPTIME:
cbfc55fd
CB
1404 if (liblxcfs_functional())
1405 return proc_uptime_read(buf, size, offset, fi);
1406
1407 return read_file_fuse_with_offset(LXC_TYPE_PROC_UPTIME_PATH,
1408 buf, size, offset, f);
1f5596dd 1409 case LXC_TYPE_PROC_STAT:
cbfc55fd
CB
1410 if (liblxcfs_functional())
1411 return proc_stat_read(buf, size, offset, fi);
1412
1413 return read_file_fuse_with_offset(LXC_TYPE_PROC_STAT_PATH, buf,
1414 size, offset, f);
1f5596dd 1415 case LXC_TYPE_PROC_DISKSTATS:
cbfc55fd
CB
1416 if (liblxcfs_functional())
1417 return proc_diskstats_read(buf, size, offset, fi);
1418
1419 return read_file_fuse_with_offset(LXC_TYPE_PROC_DISKSTATS_PATH,
1420 buf, size, offset, f);
1f5596dd 1421 case LXC_TYPE_PROC_SWAPS:
cbfc55fd
CB
1422 if (liblxcfs_functional())
1423 return proc_swaps_read(buf, size, offset, fi);
1424
1425 return read_file_fuse_with_offset(LXC_TYPE_PROC_SWAPS_PATH, buf,
1426 size, offset, f);
1f5596dd 1427 case LXC_TYPE_PROC_LOADAVG:
cbfc55fd
CB
1428 if (liblxcfs_functional())
1429 return proc_loadavg_read(buf, size, offset, fi);
1430
1431 return read_file_fuse_with_offset(LXC_TYPE_PROC_LOADAVG_PATH,
1432 buf, size, offset, f);
1f5596dd 1433 }
99b183fb
CB
1434
1435 return -EINVAL;
1f5596dd 1436}