]> git.proxmox.com Git - mirror_lxcfs.git/blob - src/proc_fuse.c
Merge pull request #474 from brauner/2021-09-01.meson
[mirror_lxcfs.git] / src / proc_fuse.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6
7 #include "config.h"
8
9 #define __STDC_FORMAT_MACROS
10 #include <dirent.h>
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <inttypes.h>
14 #include <libgen.h>
15 #include <pthread.h>
16 #include <sched.h>
17 #include <stdarg.h>
18 #include <stdbool.h>
19 #include <stdint.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <time.h>
24 #include <unistd.h>
25 #include <wait.h>
26 #include <linux/magic.h>
27 #include <linux/sched.h>
28 #include <sys/epoll.h>
29 #include <sys/mman.h>
30 #include <sys/mount.h>
31 #include <sys/param.h>
32 #include <sys/socket.h>
33 #include <sys/syscall.h>
34 #include <sys/sysinfo.h>
35 #include <sys/vfs.h>
36
37 #include "proc_fuse.h"
38
39 #include "bindings.h"
40 #include "cgroup_fuse.h"
41 #include "cgroups/cgroup.h"
42 #include "cgroups/cgroup_utils.h"
43 #include "cpuset_parse.h"
44 #include "lxcfs_fuse_compat.h"
45 #include "memory_utils.h"
46 #include "proc_loadavg.h"
47 #include "proc_cpuview.h"
48 #include "utils.h"
49
50 struct memory_stat {
51 uint64_t hierarchical_memory_limit;
52 uint64_t hierarchical_memsw_limit;
53 uint64_t total_cache;
54 uint64_t total_rss;
55 uint64_t total_rss_huge;
56 uint64_t total_shmem;
57 uint64_t total_mapped_file;
58 uint64_t total_dirty;
59 uint64_t total_writeback;
60 uint64_t total_swap;
61 uint64_t total_pgpgin;
62 uint64_t total_pgpgout;
63 uint64_t total_pgfault;
64 uint64_t total_pgmajfault;
65 uint64_t total_inactive_anon;
66 uint64_t total_active_anon;
67 uint64_t total_inactive_file;
68 uint64_t total_active_file;
69 uint64_t total_unevictable;
70 };
71
72 static off_t get_procfile_size(const char *path)
73 {
74 __do_fclose FILE *f = NULL;
75 __do_free char *line = NULL;
76 size_t len = 0;
77 ssize_t sz, answer = 0;
78
79 f = fopen(path, "re");
80 if (!f)
81 return 0;
82
83 while ((sz = getline(&line, &len, f)) != -1)
84 answer += sz;
85
86 return answer;
87 }
88
89 __lxcfs_fuse_ops int proc_getattr(const char *path, struct stat *sb)
90 {
91 struct timespec now;
92
93 memset(sb, 0, sizeof(struct stat));
94 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
95 return -EINVAL;
96
97 sb->st_uid = sb->st_gid = 0;
98 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
99 if (strcmp(path, "/proc") == 0) {
100 sb->st_mode = S_IFDIR | 00555;
101 sb->st_nlink = 2;
102 return 0;
103 }
104
105 if (strcmp(path, "/proc/meminfo") == 0 ||
106 strcmp(path, "/proc/cpuinfo") == 0 ||
107 strcmp(path, "/proc/uptime") == 0 ||
108 strcmp(path, "/proc/stat") == 0 ||
109 strcmp(path, "/proc/diskstats") == 0 ||
110 strcmp(path, "/proc/swaps") == 0 ||
111 strcmp(path, "/proc/loadavg") == 0 ||
112 strcmp(path, "/proc/slabinfo") == 0) {
113 sb->st_size = get_procfile_size(path);
114 sb->st_mode = S_IFREG | 00444;
115 sb->st_nlink = 1;
116 return 0;
117 }
118
119 return -ENOENT;
120 }
121
122 __lxcfs_fuse_ops int proc_readdir(const char *path, void *buf,
123 fuse_fill_dir_t filler, off_t offset,
124 struct fuse_file_info *fi)
125 {
126 if (DIR_FILLER(filler, buf, ".", NULL, 0) != 0 ||
127 DIR_FILLER(filler, buf, "..", NULL, 0) != 0 ||
128 DIR_FILLER(filler, buf, "cpuinfo", NULL, 0) != 0 ||
129 DIR_FILLER(filler, buf, "meminfo", NULL, 0) != 0 ||
130 DIR_FILLER(filler, buf, "stat", NULL, 0) != 0 ||
131 DIR_FILLER(filler, buf, "uptime", NULL, 0) != 0 ||
132 DIR_FILLER(filler, buf, "diskstats", NULL, 0) != 0 ||
133 DIR_FILLER(filler, buf, "swaps", NULL, 0) != 0 ||
134 DIR_FILLER(filler, buf, "loadavg", NULL, 0) != 0 ||
135 DIR_FILLER(filler, buf, "slabinfo", NULL, 0) != 0)
136 return -EINVAL;
137
138 return 0;
139 }
140
141 __lxcfs_fuse_ops int proc_open(const char *path, struct fuse_file_info *fi)
142 {
143 __do_free struct file_info *info = NULL;
144 int type = -1;
145
146 if (strcmp(path, "/proc/meminfo") == 0)
147 type = LXC_TYPE_PROC_MEMINFO;
148 else if (strcmp(path, "/proc/cpuinfo") == 0)
149 type = LXC_TYPE_PROC_CPUINFO;
150 else if (strcmp(path, "/proc/uptime") == 0)
151 type = LXC_TYPE_PROC_UPTIME;
152 else if (strcmp(path, "/proc/stat") == 0)
153 type = LXC_TYPE_PROC_STAT;
154 else if (strcmp(path, "/proc/diskstats") == 0)
155 type = LXC_TYPE_PROC_DISKSTATS;
156 else if (strcmp(path, "/proc/swaps") == 0)
157 type = LXC_TYPE_PROC_SWAPS;
158 else if (strcmp(path, "/proc/loadavg") == 0)
159 type = LXC_TYPE_PROC_LOADAVG;
160 else if (strcmp(path, "/proc/slabinfo") == 0)
161 type = LXC_TYPE_PROC_SLABINFO;
162 if (type == -1)
163 return -ENOENT;
164
165 info = zalloc(sizeof(*info));
166 if (!info)
167 return -ENOMEM;
168
169 info->type = type;
170
171 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
172
173 info->buf = zalloc(info->buflen);
174 if (!info->buf)
175 return -ENOMEM;
176 /* set actual size to buffer size */
177 info->size = info->buflen;
178
179 fi->fh = PTR_TO_UINT64(move_ptr(info));
180 return 0;
181 }
182
183 __lxcfs_fuse_ops int proc_access(const char *path, int mask)
184 {
185 if (strcmp(path, "/proc") == 0 && access(path, R_OK) == 0)
186 return 0;
187
188 /* these are all read-only */
189 if ((mask & ~R_OK) != 0)
190 return -EACCES;
191
192 return 0;
193 }
194
195 __lxcfs_fuse_ops int proc_release(const char *path, struct fuse_file_info *fi)
196 {
197 do_release_file_info(fi);
198 return 0;
199 }
200
201 static uint64_t get_memlimit(const char *cgroup, bool swap)
202 {
203 __do_free char *memlimit_str = NULL;
204 uint64_t memlimit = 0;
205 int ret;
206
207 if (swap)
208 ret = cgroup_ops->get_memory_swap_max(cgroup_ops, cgroup, &memlimit_str);
209 else
210 ret = cgroup_ops->get_memory_max(cgroup_ops, cgroup, &memlimit_str);
211 if (ret > 0 && memlimit_str[0] && safe_uint64(memlimit_str, &memlimit, 10) < 0)
212 lxcfs_error("Failed to convert memlimit %s", memlimit_str);
213
214 return memlimit;
215 }
216
217 /*
218 * This function taken from glibc-2.32, as POSIX dirname("/some-dir") will
219 * return "/some-dir" as opposed to "/", which breaks `get_min_memlimit()`
220 */
221 static char *gnu_dirname(char *path)
222 {
223 static const char dot[] = ".";
224 char *last_slash;
225
226 /* Find last '/'. */
227 last_slash = path != NULL ? strrchr(path, '/') : NULL;
228
229 if (last_slash != NULL && last_slash != path && last_slash[1] == '\0') {
230 /* Determine whether all remaining characters are slashes. */
231 char *runp;
232
233 for (runp = last_slash; runp != path; --runp)
234 if (runp[-1] != '/')
235 break;
236
237 /* The '/' is the last character, we have to look further. */
238 if (runp != path)
239 last_slash = memrchr(path, '/', runp - path);
240 }
241
242 if (last_slash != NULL) {
243 /* Determine whether all remaining characters are slashes. */
244 char *runp;
245
246 for (runp = last_slash; runp != path; --runp)
247 if (runp[-1] != '/')
248 break;
249
250 /* Terminate the path. */
251 if (runp == path) {
252 /*
253 * The last slash is the first character in the string.
254 * We have to return "/". As a special case we have to
255 * return "//" if there are exactly two slashes at the
256 * beginning of the string. See XBD 4.10 Path Name
257 * Resolution for more information
258 */
259 if (last_slash == path + 1)
260 ++last_slash;
261 else
262 last_slash = path + 1;
263 } else
264 last_slash = runp;
265
266 last_slash[0] = '\0';
267 } else {
268 /*
269 * This assignment is ill-designed but the XPG specs require to
270 * return a string containing "." in any case no directory part
271 * is found and so a static and constant string is required.
272 */
273 path = (char *)dot;
274 }
275
276 return path;
277 }
278
279 static uint64_t get_min_memlimit(const char *cgroup, bool swap)
280 {
281 __do_free char *copy = NULL;
282 uint64_t memlimit = 0, retlimit = 0;
283
284 copy = strdup(cgroup);
285 if (!copy)
286 return log_error_errno(0, ENOMEM, "Failed to allocate memory");
287
288 retlimit = get_memlimit(copy, swap);
289
290 /*
291 * If the cgroup doesn't start with / (probably won't happen), dirname()
292 * will terminate with "" instead of "/"
293 */
294 while (*copy && strcmp(copy, "/") != 0) {
295 char *it = copy;
296
297 it = gnu_dirname(it);
298 memlimit = get_memlimit(it, swap);
299 if (memlimit > 0 && memlimit < retlimit)
300 retlimit = memlimit;
301 };
302
303 return retlimit;
304 }
305
306 static inline bool startswith(const char *line, const char *pref)
307 {
308 return strncmp(line, pref, strlen(pref)) == 0;
309 }
310
311 static int proc_swaps_read(char *buf, size_t size, off_t offset,
312 struct fuse_file_info *fi)
313 {
314 __do_free char *cgroup = NULL, *memusage_str = NULL,
315 *memswusage_str = NULL, *memswpriority_str = NULL;
316 struct fuse_context *fc = fuse_get_context();
317 bool wants_swap = lxcfs_has_opt(fuse_get_context()->private_data, LXCFS_SWAP_ON);
318 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
319 uint64_t memswlimit = 0, memlimit = 0, memusage = 0, memswusage = 0,
320 swtotal = 0, swusage = 0, memswpriority = 1,
321 hostswtotal = 0, hostswfree = 0;
322 ssize_t total_len = 0;
323 ssize_t l = 0;
324 char *cache = d->buf;
325 int ret;
326 __do_free char *line = NULL;
327 __do_free void *fopen_cache = NULL;
328 __do_fclose FILE *f = NULL;
329 size_t linelen = 0;
330
331 if (offset) {
332 size_t left;
333
334 if (offset > d->size)
335 return -EINVAL;
336
337 if (!d->cached)
338 return 0;
339
340 left = d->size - offset;
341 total_len = left > size ? size: left;
342 memcpy(buf, cache + offset, total_len);
343
344 return total_len;
345 }
346
347 pid_t initpid = lookup_initpid_in_store(fc->pid);
348 if (initpid <= 1 || is_shared_pidns(initpid))
349 initpid = fc->pid;
350
351 cgroup = get_pid_cgroup(initpid, "memory");
352 if (!cgroup)
353 return read_file_fuse("/proc/swaps", buf, size, d);
354 prune_init_slice(cgroup);
355
356 memlimit = get_min_memlimit(cgroup, false);
357
358 ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str);
359 if (ret < 0)
360 return 0;
361
362 if (safe_uint64(memusage_str, &memusage, 10) < 0)
363 lxcfs_error("Failed to convert memusage %s", memusage_str);
364
365 if (wants_swap) {
366 memswlimit = get_min_memlimit(cgroup, true);
367 if (memswlimit > 0) {
368 ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str);
369 if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) {
370 if (memlimit > memswlimit)
371 swtotal = 0;
372 else
373 swtotal = (memswlimit - memlimit) / 1024;
374 if (memusage > memswusage || swtotal == 0)
375 swusage = 0;
376 else
377 swusage = (memswusage - memusage) / 1024;
378 }
379
380 ret = cgroup_ops->get_memory_swappiness(cgroup_ops, cgroup, &memswpriority_str);
381 if (ret >= 0)
382 safe_uint64(memswpriority_str, &memswpriority, 10);
383 }
384 }
385
386 total_len = snprintf(d->buf, d->size, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
387
388 /* Read host total and free values */
389 f = fopen_cached("/proc/meminfo", "re", &fopen_cache);
390 if (!f)
391 return 0;
392
393 while (getline(&line, &linelen, f) != -1) {
394 if (startswith(line, "SwapTotal:"))
395 sscanf(line, "SwapTotal: %8" PRIu64 " kB", &hostswtotal);
396 else if (startswith(line, "SwapFree:"))
397 sscanf(line, "SwapFree: %8" PRIu64 " kB", &hostswfree);
398 }
399
400 if (wants_swap) {
401 /* The total amount of swap is always reported to be the
402 lesser of the RAM+SWAP limit or the SWAP device size.
403 This is because the kernel can swap as much as it
404 wants and not only up to swtotal. */
405 swtotal = memlimit / 1024 + swtotal;
406 if (hostswtotal < swtotal) {
407 swtotal = hostswtotal;
408 }
409
410 /* When swappiness is 0, pretend we can't swap. */
411 if (memswpriority == 0) {
412 swtotal = swusage;
413 }
414 }
415
416 if (swtotal > 0) {
417 l = snprintf(d->buf + total_len, d->size - total_len,
418 "none%*svirtual\t\t%" PRIu64 "\t%" PRIu64 "\t0\n",
419 36, " ", swtotal, swusage);
420 total_len += l;
421 }
422
423 if (total_len < 0 || l < 0)
424 return log_error(0, "Failed writing to cache");
425
426 d->cached = 1;
427 d->size = (int)total_len;
428
429 if ((size_t)total_len > size)
430 total_len = size;
431 memcpy(buf, d->buf, total_len);
432
433 return total_len;
434 }
435
436 static void get_blkio_io_value(char *str, unsigned major, unsigned minor,
437 char *iotype, uint64_t *v)
438 {
439 char *eol;
440 char key[32];
441 size_t len;
442
443 memset(key, 0, 32);
444 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
445
446 *v = 0;
447 len = strlen(key);
448 while (*str) {
449 if (startswith(str, key)) {
450 sscanf(str + len, "%lu", v);
451 return;
452 }
453 eol = strchr(str, '\n');
454 if (!eol)
455 return;
456 str = eol+1;
457 }
458 }
459
460 struct lxcfs_diskstats {
461 unsigned int major; /* 1 - major number */
462 unsigned int minor; /* 2 - minor mumber */
463 char dev_name[72]; /* 3 - device name */
464 uint64_t read; /* 4 - reads completed successfully */
465 uint64_t read_merged; /* 5 - reads merged */
466 uint64_t read_sectors; /* 6 - sectors read */
467 uint64_t read_ticks; /* 7 - time spent reading (ms) */
468 uint64_t write; /* 8 - writes completed */
469 uint64_t write_merged; /* 9 - writes merged */
470 uint64_t write_sectors; /* 10 - sectors written */
471 uint64_t write_ticks; /* 11 - time spent writing (ms) */
472 uint64_t ios_pgr; /* 12 - I/Os currently in progress */
473 uint64_t total_ticks; /* 13 - time spent doing I/Os (ms) */
474 uint64_t rq_ticks; /* 14 - weighted time spent doing I/Os (ms) */
475 uint64_t discard; /* 15 - discards completed successfully (4.18+) */
476 uint64_t discard_merged; /* 16 - discards merged (4.18+) */
477 uint64_t discard_sectors; /* 17 - sectors discarded (4.18+) */
478 uint64_t discard_ticks; /* 18 - time spent discarding (4.18+) */
479 };
480
481 static int proc_diskstats_read(char *buf, size_t size, off_t offset,
482 struct fuse_file_info *fi)
483 {
484 __do_free char *cg = NULL, *io_serviced_str = NULL,
485 *io_merged_str = NULL, *io_service_bytes_str = NULL,
486 *io_wait_time_str = NULL, *io_service_time_str = NULL,
487 *line = NULL;
488 __do_free void *fopen_cache = NULL;
489 __do_fclose FILE *f = NULL;
490 struct fuse_context *fc = fuse_get_context();
491 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
492 struct lxcfs_diskstats stats = {};
493 /* helper fields */
494 uint64_t read_service_time, write_service_time, discard_service_time, read_wait_time,
495 write_wait_time, discard_wait_time;
496 char *cache = d->buf;
497 size_t cache_size = d->buflen;
498 size_t linelen = 0, total_len = 0;
499 int i = 0;
500 int ret;
501
502 if (offset) {
503 size_t left;
504
505 if (offset > d->size)
506 return -EINVAL;
507
508 if (!d->cached)
509 return 0;
510
511 left = d->size - offset;
512 total_len = left > size ? size: left;
513 memcpy(buf, cache + offset, total_len);
514
515 return total_len;
516 }
517
518 pid_t initpid = lookup_initpid_in_store(fc->pid);
519 if (initpid <= 1 || is_shared_pidns(initpid))
520 initpid = fc->pid;
521
522 cg = get_pid_cgroup(initpid, "blkio");
523 if (!cg)
524 return read_file_fuse("/proc/diskstats", buf, size, d);
525 prune_init_slice(cg);
526
527 ret = cgroup_ops->get_io_serviced(cgroup_ops, cg, &io_serviced_str);
528 if (ret < 0) {
529 if (ret == -EOPNOTSUPP)
530 return read_file_fuse("/proc/diskstats", buf, size, d);
531 }
532
533 ret = cgroup_ops->get_io_merged(cgroup_ops, cg, &io_merged_str);
534 if (ret < 0) {
535 if (ret == -EOPNOTSUPP)
536 return read_file_fuse("/proc/diskstats", buf, size, d);
537 }
538
539 ret = cgroup_ops->get_io_service_bytes(cgroup_ops, cg, &io_service_bytes_str);
540 if (ret < 0) {
541 if (ret == -EOPNOTSUPP)
542 return read_file_fuse("/proc/diskstats", buf, size, d);
543 }
544
545 ret = cgroup_ops->get_io_wait_time(cgroup_ops, cg, &io_wait_time_str);
546 if (ret < 0) {
547 if (ret == -EOPNOTSUPP)
548 return read_file_fuse("/proc/diskstats", buf, size, d);
549 }
550
551 ret = cgroup_ops->get_io_service_time(cgroup_ops, cg, &io_service_time_str);
552 if (ret < 0) {
553 if (ret == -EOPNOTSUPP)
554 return read_file_fuse("/proc/diskstats", buf, size, d);
555 }
556
557 f = fopen_cached("/proc/diskstats", "re", &fopen_cache);
558 if (!f)
559 return 0;
560
561 while (getline(&line, &linelen, f) != -1) {
562 ssize_t l;
563 char lbuf[256];
564
565 i = sscanf(line, "%u %u %71s", &stats.major, &stats.minor, stats.dev_name);
566 if (i != 3)
567 continue;
568
569 get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Read", &stats.read);
570 get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Write", &stats.write);
571 get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Discard", &stats.discard);
572
573 get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Read", &stats.read_merged);
574 get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Write", &stats.write_merged);
575 get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Discard", &stats.discard_merged);
576
577 get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Read", &stats.read_sectors);
578 stats.read_sectors = stats.read_sectors / 512;
579 get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Write", &stats.write_sectors);
580 stats.write_sectors = stats.write_sectors / 512;
581 get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Discard", &stats.discard_sectors);
582 stats.discard_sectors = stats.discard_sectors / 512;
583
584 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Read", &read_service_time);
585 read_service_time = read_service_time / 1000000;
586 get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Read", &read_wait_time);
587 read_wait_time = read_wait_time / 1000000;
588 stats.read_ticks = read_service_time + read_wait_time;
589
590 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Write", &write_service_time);
591 write_service_time = write_service_time / 1000000;
592 get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Write", &write_wait_time);
593 write_wait_time = write_wait_time / 1000000;
594 stats.write_ticks = write_service_time + write_wait_time;
595
596 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Discard", &discard_service_time);
597 discard_service_time = discard_service_time / 1000000;
598 get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Discard", &discard_wait_time);
599 discard_wait_time = discard_wait_time / 1000000;
600 stats.discard_ticks = discard_service_time + discard_wait_time;
601
602 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Total", &stats.total_ticks);
603 stats.total_ticks = stats.total_ticks / 1000000;
604
605 memset(lbuf, 0, 256);
606 if (stats.read || stats.write || stats.read_merged || stats.write_merged ||
607 stats.read_sectors || stats.write_sectors || stats.read_ticks ||
608 stats.write_ticks || stats.ios_pgr || stats.total_ticks || stats.rq_ticks ||
609 stats.discard_merged || stats.discard_sectors || stats.discard_ticks)
610 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
611 stats.major,
612 stats.minor,
613 stats.dev_name,
614 stats.read,
615 stats.read_merged,
616 stats.read_sectors,
617 stats.read_ticks,
618 stats.write,
619 stats.write_merged,
620 stats.write_sectors,
621 stats.write_ticks,
622 stats.ios_pgr,
623 stats.total_ticks,
624 stats.rq_ticks,
625 stats.discard_merged,
626 stats.discard_sectors,
627 stats.discard_ticks);
628 else
629 continue;
630
631 l = snprintf(cache, cache_size, "%s", lbuf);
632 if (l < 0)
633 return log_error(0, "Failed to write cache");
634 if ((size_t)l >= cache_size)
635 return log_error(0, "Write to cache was truncated");
636
637 cache += l;
638 cache_size -= l;
639 total_len += l;
640 }
641
642 d->cached = 1;
643 d->size = total_len;
644 if (total_len > size)
645 total_len = size;
646 memcpy(buf, d->buf, total_len);
647
648 return total_len;
649 }
650
651 #ifdef RELOADTEST
652 static inline void iwashere(void)
653 {
654 mknod("/tmp/lxcfs-iwashere", S_IFREG, 0644);
655 }
656 #endif
657
658 /*
659 * This function retrieves the busy time of a group of tasks by looking at
660 * cpuacct.usage. Unfortunately, this only makes sense when the container has
661 * been given it's own cpuacct cgroup. If not, this function will take the busy
662 * time of all other taks that do not actually belong to the container into
663 * account as well. If someone has a clever solution for this please send a
664 * patch!
665 */
666 static double get_reaper_busy(pid_t task)
667 {
668 __do_free char *cgroup = NULL, *usage_str = NULL;
669 uint64_t usage = 0;
670 pid_t initpid;
671
672 initpid = lookup_initpid_in_store(task);
673 if (initpid <= 0)
674 return 0;
675
676 cgroup = get_pid_cgroup(initpid, "cpuacct");
677 if (!cgroup)
678 return 0;
679 prune_init_slice(cgroup);
680
681 if (!cgroup_ops->get(cgroup_ops, "cpuacct", cgroup, "cpuacct.usage", &usage_str))
682 return 0;
683
684 if (safe_uint64(usage_str, &usage, 10) < 0)
685 lxcfs_error("Failed to convert usage %s", usage_str);
686
687 return ((double)usage / 1000000000);
688 }
689
690 static uint64_t get_reaper_start_time(pid_t pid)
691 {
692 __do_free void *fopen_cache = NULL;
693 __do_fclose FILE *f = NULL;
694 int ret;
695 uint64_t starttime;
696 char path[STRLITERALLEN("/proc/") + LXCFS_NUMSTRLEN64 +
697 STRLITERALLEN("/stat") + 1];
698 pid_t qpid;
699
700 qpid = lookup_initpid_in_store(pid);
701 if (qpid <= 0)
702 return ret_errno(EINVAL);
703
704 ret = snprintf(path, sizeof(path), "/proc/%d/stat", qpid);
705 if (ret < 0 || (size_t)ret >= sizeof(path))
706 return ret_errno(EINVAL);
707
708 f = fopen_cached(path, "re", &fopen_cache);
709 if (!f)
710 return ret_errno(EINVAL);
711
712 /* Note that the *scanf() argument supression requires that length
713 * modifiers such as "l" are omitted. Otherwise some compilers will yell
714 * at us. It's like telling someone you're not married and then asking
715 * if you can bring your wife to the party.
716 */
717 ret = fscanf(f, "%*d " /* (1) pid %d */
718 "%*s " /* (2) comm %s */
719 "%*c " /* (3) state %c */
720 "%*d " /* (4) ppid %d */
721 "%*d " /* (5) pgrp %d */
722 "%*d " /* (6) session %d */
723 "%*d " /* (7) tty_nr %d */
724 "%*d " /* (8) tpgid %d */
725 "%*u " /* (9) flags %u */
726 "%*u " /* (10) minflt %lu */
727 "%*u " /* (11) cminflt %lu */
728 "%*u " /* (12) majflt %lu */
729 "%*u " /* (13) cmajflt %lu */
730 "%*u " /* (14) utime %lu */
731 "%*u " /* (15) stime %lu */
732 "%*d " /* (16) cutime %ld */
733 "%*d " /* (17) cstime %ld */
734 "%*d " /* (18) priority %ld */
735 "%*d " /* (19) nice %ld */
736 "%*d " /* (20) num_threads %ld */
737 "%*d " /* (21) itrealvalue %ld */
738 "%" PRIu64, /* (22) starttime %llu */
739 &starttime);
740 if (ret != 1)
741 return ret_errno(EINVAL);
742
743 return ret_set_errno(starttime, 0);
744 }
745
746 static double get_reaper_start_time_in_sec(pid_t pid)
747 {
748 uint64_t clockticks, ticks_per_sec;
749 int64_t ret;
750 double res = 0;
751
752 clockticks = get_reaper_start_time(pid);
753 if (clockticks <= 0)
754 return log_debug(0, "Failed to retrieve start time of pid %d", pid);
755
756 ret = sysconf(_SC_CLK_TCK);
757 if (ret < 0)
758 return log_debug(0, "Failed to determine number of clock ticks in a second");
759
760 ticks_per_sec = (uint64_t)ret;
761 res = (double)clockticks / ticks_per_sec;
762 return res;
763 }
764
765 static double get_reaper_age(pid_t pid)
766 {
767 uint64_t uptime_ms;
768 double procstart, procage;
769
770 /*
771 * We need to substract the time the process has started since system
772 * boot minus the time when the system has started to get the actual
773 * reaper age.
774 */
775 procstart = get_reaper_start_time_in_sec(pid);
776 procage = procstart;
777 if (procstart > 0) {
778 int ret;
779 struct timespec spec;
780
781 ret = clock_gettime(CLOCK_BOOTTIME, &spec);
782 if (ret < 0)
783 return 0;
784
785 uptime_ms = (spec.tv_sec * 1000) + (spec.tv_nsec * 1e-6);
786 procage = (uptime_ms - (procstart * 1000)) / 1000;
787 }
788
789 return procage;
790 }
791
792 /*
793 * We read /proc/uptime and reuse its second field.
794 * For the first field, we use the mtime for the reaper for
795 * the calling pid as returned by getreaperage
796 */
797 static int proc_uptime_read(char *buf, size_t size, off_t offset,
798 struct fuse_file_info *fi)
799 {
800 struct fuse_context *fc = fuse_get_context();
801 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
802 char *cache = d->buf;
803 ssize_t total_len = 0, ret = 0;
804 double busytime, idletime, reaperage;
805
806 #ifdef RELOADTEST
807 iwashere();
808 #endif
809
810 if (offset) {
811 size_t left;
812
813 if (offset > d->size)
814 return -EINVAL;
815
816 if (!d->cached)
817 return 0;
818
819 left = d->size - offset;
820 total_len = left > size ? size : left;
821 memcpy(buf, cache + offset, total_len);
822
823 return total_len;
824 }
825
826 reaperage = get_reaper_age(fc->pid);
827 /*
828 * To understand why this is done, please read the comment to the
829 * get_reaper_busy() function.
830 */
831 idletime = reaperage;
832 busytime = get_reaper_busy(fc->pid);
833 if (reaperage >= busytime)
834 idletime = reaperage - busytime;
835
836 ret = snprintf(d->buf, d->buflen, "%.2lf %.2lf\n", reaperage, idletime);
837 if (ret < 0 || ret >= d->buflen)
838 return read_file_fuse("/proc/uptime", buf, size, d);
839 total_len = ret;
840
841 d->cached = 1;
842 d->size = total_len;
843 if ((size_t)total_len > size)
844 total_len = size;
845 memcpy(buf, d->buf, total_len);
846
847 return total_len;
848 }
849
850 #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2)
851 static int proc_stat_read(char *buf, size_t size, off_t offset,
852 struct fuse_file_info *fi)
853 {
854 __do_free char *cg = NULL, *cpuset = NULL, *line = NULL;
855 __do_free void *fopen_cache = NULL;
856 __do_free struct cpuacct_usage *cg_cpu_usage = NULL;
857 __do_fclose FILE *f = NULL;
858 struct fuse_context *fc = fuse_get_context();
859 struct lxcfs_opts *opts = (struct lxcfs_opts *)fc->private_data;
860 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
861 size_t linelen = 0, total_len = 0;
862 int curcpu = -1; /* cpu numbering starts at 0 */
863 int physcpu = 0;
864 uint64_t user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0,
865 softirq = 0, steal = 0, guest = 0, guest_nice = 0;
866 uint64_t user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0,
867 iowait_sum = 0, irq_sum = 0, softirq_sum = 0, steal_sum = 0,
868 guest_sum = 0, guest_nice_sum = 0;
869 char cpuall[CPUALL_MAX_SIZE];
870 /* reserve for cpu all */
871 char *cache = d->buf + CPUALL_MAX_SIZE;
872 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
873 int cg_cpu_usage_size = 0;
874
875 if (offset) {
876 size_t left;
877
878 if (offset > d->size)
879 return -EINVAL;
880
881 if (!d->cached)
882 return 0;
883
884 left = d->size - offset;
885 total_len = left > size ? size : left;
886 memcpy(buf, d->buf + offset, total_len);
887
888 return total_len;
889 }
890
891 pid_t initpid = lookup_initpid_in_store(fc->pid);
892 if (initpid <= 1 || is_shared_pidns(initpid))
893 initpid = fc->pid;
894
895 /*
896 * when container run with host pid namespace initpid == 1, cgroup will "/"
897 * we should return host os's /proc contents.
898 * in some case cpuacct_usage.all in "/" will larger then /proc/stat
899 */
900 if (initpid == 1)
901 return read_file_fuse("/proc/stat", buf, size, d);
902
903 cg = get_pid_cgroup(initpid, "cpuset");
904 if (!cg)
905 return read_file_fuse("/proc/stat", buf, size, d);
906 prune_init_slice(cg);
907
908 cpuset = get_cpuset(cg);
909 if (!cpuset)
910 return 0;
911
912 f = fopen_cached("/proc/stat", "re", &fopen_cache);
913 if (!f)
914 return 0;
915
916 /* Skip first system cpu line. */
917 if (getline(&line, &linelen, f) < 0)
918 return log_error(0, "proc_stat_read read first line failed");
919
920 /*
921 * Read cpuacct.usage_all for all CPUs.
922 * If the cpuacct cgroup is present, it is used to calculate the container's
923 * CPU usage. If not, values from the host's /proc/stat are used.
924 */
925 if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage, &cg_cpu_usage_size) == 0) {
926 if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs) {
927 total_len = cpuview_proc_stat(cg, cpuset, cg_cpu_usage,
928 cg_cpu_usage_size, f,
929 d->buf, d->buflen);
930 goto out;
931 }
932 } else {
933 lxcfs_v("proc_stat_read failed to read from cpuacct, falling back to the host's /proc/stat");
934 }
935
936 while (getline(&line, &linelen, f) != -1) {
937 ssize_t l;
938 char cpu_char[10]; /* That's a lot of cores */
939 char *c;
940 uint64_t all_used, cg_used, new_idle;
941 int ret;
942
943 if (strlen(line) == 0)
944 continue;
945 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
946 /* not a ^cpuN line containing a number N, just print it */
947 l = snprintf(cache, cache_size, "%s", line);
948 if (l < 0)
949 return log_error(0, "Failed to write cache");
950 if ((size_t)l >= cache_size)
951 return log_error(0, "Write to cache was truncated");
952
953 cache += l;
954 cache_size -= l;
955 total_len += l;
956
957 continue;
958 }
959
960 if (sscanf(cpu_char, "%d", &physcpu) != 1)
961 continue;
962
963 if (!cpu_in_cpuset(physcpu, cpuset))
964 continue;
965
966 curcpu++;
967
968 ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
969 &user,
970 &nice,
971 &system,
972 &idle,
973 &iowait,
974 &irq,
975 &softirq,
976 &steal,
977 &guest,
978 &guest_nice);
979 if (ret != 10 || !cg_cpu_usage) {
980 c = strchr(line, ' ');
981 if (!c)
982 continue;
983
984 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
985 if (l < 0)
986 return log_error(0, "Failed to write cache");
987 if ((size_t)l >= cache_size)
988 return log_error(0, "Write to cache was truncated");
989
990 cache += l;
991 cache_size -= l;
992 total_len += l;
993
994 if (ret != 10)
995 continue;
996 }
997
998 if (cg_cpu_usage) {
999 if (physcpu >= cg_cpu_usage_size)
1000 break;
1001
1002 all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice;
1003 cg_used = cg_cpu_usage[physcpu].user + cg_cpu_usage[physcpu].system;
1004
1005 if (all_used >= cg_used) {
1006 new_idle = idle + (all_used - cg_used);
1007 } else {
1008 lxcfs_debug("cpu%d from %s has unexpected cpu time: %" PRIu64 " in /proc/stat, %" PRIu64 " in cpuacct.usage_all; unable to determine idle time",
1009 curcpu, cg, all_used, cg_used);
1010 new_idle = idle;
1011 }
1012
1013 l = snprintf(cache, cache_size,
1014 "cpu%d %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n",
1015 curcpu, cg_cpu_usage[physcpu].user,
1016 cg_cpu_usage[physcpu].system, new_idle);
1017 if (l < 0)
1018 return log_error(0, "Failed to write cache");
1019 if ((size_t)l >= cache_size)
1020 return log_error(0, "Write to cache was truncated");
1021
1022 cache += l;
1023 cache_size -= l;
1024 total_len += l;
1025
1026 user_sum += cg_cpu_usage[physcpu].user;
1027 system_sum += cg_cpu_usage[physcpu].system;
1028 idle_sum += new_idle;
1029 } else {
1030 user_sum += user;
1031 nice_sum += nice;
1032 system_sum += system;
1033 idle_sum += idle;
1034 iowait_sum += iowait;
1035 irq_sum += irq;
1036 softirq_sum += softirq;
1037 steal_sum += steal;
1038 guest_sum += guest;
1039 guest_nice_sum += guest_nice;
1040 }
1041 }
1042
1043 cache = d->buf;
1044
1045 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
1046 user_sum,
1047 nice_sum,
1048 system_sum,
1049 idle_sum,
1050 iowait_sum,
1051 irq_sum,
1052 softirq_sum,
1053 steal_sum,
1054 guest_sum,
1055 guest_nice_sum);
1056 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE) {
1057 memcpy(cache, cpuall, cpuall_len);
1058 cache += cpuall_len;
1059 } else {
1060 /* shouldn't happen */
1061 lxcfs_error("proc_stat_read copy cpuall failed, cpuall_len=%d", cpuall_len);
1062 cpuall_len = 0;
1063 }
1064
1065 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
1066 total_len += cpuall_len;
1067
1068 out:
1069 d->cached = 1;
1070 d->size = total_len;
1071 if (total_len > size)
1072 total_len = size;
1073
1074 memcpy(buf, d->buf, total_len);
1075 return total_len;
1076 }
1077
1078 /* Note that "memory.stat" in cgroup2 is hierarchical by default. */
1079 static bool cgroup_parse_memory_stat(const char *cgroup, struct memory_stat *mstat)
1080 {
1081 __do_close int fd = -EBADF;
1082 __do_fclose FILE *f = NULL;
1083 __do_free char *line = NULL;
1084 __do_free void *fdopen_cache = NULL;
1085 bool unified;
1086 size_t len = 0;
1087 ssize_t linelen;
1088
1089 fd = cgroup_ops->get_memory_stats_fd(cgroup_ops, cgroup);
1090 if (fd < 0)
1091 return false;
1092
1093 f = fdopen_cached(fd, "re", &fdopen_cache);
1094 if (!f)
1095 return false;
1096
1097 unified = pure_unified_layout(cgroup_ops);
1098 while ((linelen = getline(&line, &len, f)) != -1) {
1099 if (!unified && startswith(line, "hierarchical_memory_limit")) {
1100 sscanf(line, "hierarchical_memory_limit %" PRIu64, &(mstat->hierarchical_memory_limit));
1101 } else if (!unified && startswith(line, "hierarchical_memsw_limit")) {
1102 sscanf(line, "hierarchical_memsw_limit %" PRIu64, &(mstat->hierarchical_memsw_limit));
1103 } else if (startswith(line, unified ? "file" :"total_cache")) {
1104 sscanf(line, unified ? "file %" PRIu64 : "total_cache %" PRIu64, &(mstat->total_cache));
1105 } else if (!unified && startswith(line, "total_rss")) {
1106 sscanf(line, "total_rss %" PRIu64, &(mstat->total_rss));
1107 } else if (!unified && startswith(line, "total_rss_huge")) {
1108 sscanf(line, "total_rss_huge %" PRIu64, &(mstat->total_rss_huge));
1109 } else if (startswith(line, unified ? "shmem" : "total_shmem")) {
1110 sscanf(line, unified ? "shmem %" PRIu64 : "total_shmem %" PRIu64, &(mstat->total_shmem));
1111 } else if (startswith(line, unified ? "file_mapped" : "total_mapped_file")) {
1112 sscanf(line, unified ? "file_mapped %" PRIu64 : "total_mapped_file %" PRIu64, &(mstat->total_mapped_file));
1113 } else if (!unified && startswith(line, "total_dirty")) {
1114 sscanf(line, "total_dirty %" PRIu64, &(mstat->total_dirty));
1115 } else if (!unified && startswith(line, "total_writeback")) {
1116 sscanf(line, "total_writeback %" PRIu64, &(mstat->total_writeback));
1117 } else if (!unified && startswith(line, "total_swap")) {
1118 sscanf(line, "total_swap %" PRIu64, &(mstat->total_swap));
1119 } else if (!unified && startswith(line, "total_pgpgin")) {
1120 sscanf(line, "total_pgpgin %" PRIu64, &(mstat->total_pgpgin));
1121 } else if (!unified && startswith(line, "total_pgpgout")) {
1122 sscanf(line, "total_pgpgout %" PRIu64, &(mstat->total_pgpgout));
1123 } else if (startswith(line, unified ? "pgfault" : "total_pgfault")) {
1124 sscanf(line, unified ? "pgfault %" PRIu64 : "total_pgfault %" PRIu64, &(mstat->total_pgfault));
1125 } else if (startswith(line, unified ? "pgmajfault" : "total_pgmajfault")) {
1126 sscanf(line, unified ? "pgmajfault %" PRIu64 : "total_pgmajfault %" PRIu64, &(mstat->total_pgmajfault));
1127 } else if (startswith(line, unified ? "inactive_anon" : "total_inactive_anon")) {
1128 sscanf(line, unified ? "inactive_anon %" PRIu64 : "total_inactive_anon %" PRIu64, &(mstat->total_inactive_anon));
1129 } else if (startswith(line, unified ? "active_anon" : "total_active_anon")) {
1130 sscanf(line, unified ? "active_anon %" PRIu64 : "total_active_anon %" PRIu64, &(mstat->total_active_anon));
1131 } else if (startswith(line, unified ? "inactive_file" : "total_inactive_file")) {
1132 sscanf(line, unified ? "inactive_file %" PRIu64 : "total_inactive_file %" PRIu64, &(mstat->total_inactive_file));
1133 } else if (startswith(line, unified ? "active_file" : "total_active_file")) {
1134 sscanf(line, unified ? "active_file %" PRIu64 : "total_active_file %" PRIu64, &(mstat->total_active_file));
1135 } else if (startswith(line, unified ? "unevictable" : "total_unevictable")) {
1136 sscanf(line, unified ? "unevictable %" PRIu64 : "total_unevictable %" PRIu64, &(mstat->total_unevictable));
1137 }
1138 }
1139
1140 return true;
1141 }
1142
1143 static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1144 struct fuse_file_info *fi)
1145 {
1146 __do_free char *cgroup = NULL, *line = NULL, *memusage_str = NULL,
1147 *memswusage_str = NULL, *memswpriority_str = NULL;
1148 __do_free void *fopen_cache = NULL;
1149 __do_fclose FILE *f = NULL;
1150 struct fuse_context *fc = fuse_get_context();
1151 bool wants_swap = lxcfs_has_opt(fuse_get_context()->private_data, LXCFS_SWAP_ON);
1152 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1153 uint64_t memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0,
1154 hosttotal = 0, swfree = 0, swusage = 0, swtotal = 0,
1155 memswpriority = 1;
1156 struct memory_stat mstat = {};
1157 size_t linelen = 0, total_len = 0;
1158 char *cache = d->buf;
1159 size_t cache_size = d->buflen;
1160 int ret;
1161
1162 if (offset) {
1163 size_t left;
1164
1165 if (offset > d->size)
1166 return -EINVAL;
1167
1168 if (!d->cached)
1169 return 0;
1170
1171 left = d->size - offset;
1172 total_len = left > size ? size : left;
1173 memcpy(buf, cache + offset, total_len);
1174
1175 return total_len;
1176 }
1177
1178 pid_t initpid = lookup_initpid_in_store(fc->pid);
1179 if (initpid <= 1 || is_shared_pidns(initpid))
1180 initpid = fc->pid;
1181
1182 cgroup = get_pid_cgroup(initpid, "memory");
1183 if (!cgroup)
1184 return read_file_fuse("/proc/meminfo", buf, size, d);
1185
1186 prune_init_slice(cgroup);
1187
1188 /* memory limits */
1189 ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str);
1190 if (ret < 0)
1191 return read_file_fuse("/proc/meminfo", buf, size, d);
1192
1193 if (safe_uint64(memusage_str, &memusage, 10) < 0)
1194 lxcfs_error("Failed to convert memusage %s", memusage_str);
1195
1196 if (!cgroup_parse_memory_stat(cgroup, &mstat))
1197 return read_file_fuse("/proc/meminfo", buf, size, d);
1198
1199 memlimit = get_min_memlimit(cgroup, false);
1200
1201 /*
1202 * Following values are allowed to fail, because swapaccount might be
1203 * turned off for current kernel.
1204 */
1205 if (wants_swap) {
1206 memswlimit = get_min_memlimit(cgroup, true);
1207 if (memswlimit > 0) {
1208 ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str);
1209 if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) {
1210 if (memlimit > memswlimit)
1211 swtotal = 0;
1212 else
1213 swtotal = (memswlimit - memlimit) / 1024;
1214 if (memusage > memswusage || swtotal == 0)
1215 swusage = 0;
1216 else
1217 swusage = (memswusage - memusage) / 1024;
1218 }
1219 }
1220
1221 ret = cgroup_ops->get_memory_swappiness(cgroup_ops, cgroup, &memswpriority_str);
1222 if (ret >= 0)
1223 safe_uint64(memswpriority_str, &memswpriority, 10);
1224 }
1225
1226 f = fopen_cached("/proc/meminfo", "re", &fopen_cache);
1227 if (!f)
1228 return read_file_fuse("/proc/meminfo", buf, size, d);
1229
1230 memusage /= 1024;
1231 memlimit /= 1024;
1232 while (getline(&line, &linelen, f) != -1) {
1233 ssize_t l;
1234 char *printme, lbuf[100];
1235
1236 memset(lbuf, 0, 100);
1237 if (startswith(line, "MemTotal:")) {
1238 sscanf(line+sizeof("MemTotal:")-1, "%" PRIu64, &hosttotal);
1239 if (memlimit == 0)
1240 memlimit = hosttotal;
1241
1242 if (hosttotal < memlimit)
1243 memlimit = hosttotal;
1244 snprintf(lbuf, 100, "MemTotal: %8" PRIu64 " kB\n", memlimit);
1245 printme = lbuf;
1246 } else if (startswith(line, "MemFree:")) {
1247 snprintf(lbuf, 100, "MemFree: %8" PRIu64 " kB\n", memlimit - memusage);
1248 printme = lbuf;
1249 } else if (startswith(line, "MemAvailable:")) {
1250 snprintf(lbuf, 100, "MemAvailable: %8" PRIu64 " kB\n", memlimit - memusage + mstat.total_cache / 1024);
1251 printme = lbuf;
1252 } else if (startswith(line, "SwapTotal:")) {
1253 if (wants_swap) {
1254 uint64_t hostswtotal = 0;
1255
1256 sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal);
1257
1258 /* The total amount of swap is always reported to be the
1259 lesser of the RAM+SWAP limit or the SWAP device size.
1260 This is because the kernel can swap as much as it
1261 wants and not only up to swtotal. */
1262
1263 swtotal = memlimit + swtotal;
1264 if (hostswtotal < swtotal) {
1265 swtotal = hostswtotal;
1266 }
1267
1268 /* When swappiness is 0, pretend we can't swap. */
1269 if (memswpriority == 0) {
1270 swtotal = swusage;
1271 }
1272 }
1273
1274 snprintf(lbuf, 100, "SwapTotal: %8" PRIu64 " kB\n", swtotal);
1275 printme = lbuf;
1276 } else if (startswith(line, "SwapFree:")) {
1277 if (wants_swap) {
1278 swfree = swtotal - swusage;
1279 }
1280
1281 snprintf(lbuf, 100, "SwapFree: %8" PRIu64 " kB\n", swfree);
1282 printme = lbuf;
1283 } else if (startswith(line, "Slab:")) {
1284 snprintf(lbuf, 100, "Slab: %8" PRIu64 " kB\n", (uint64_t)0);
1285 printme = lbuf;
1286 } else if (startswith(line, "Buffers:")) {
1287 snprintf(lbuf, 100, "Buffers: %8" PRIu64 " kB\n", (uint64_t)0);
1288 printme = lbuf;
1289 } else if (startswith(line, "Cached:")) {
1290 snprintf(lbuf, 100, "Cached: %8" PRIu64 " kB\n",
1291 mstat.total_cache / 1024);
1292 printme = lbuf;
1293 } else if (startswith(line, "SwapCached:")) {
1294 snprintf(lbuf, 100, "SwapCached: %8" PRIu64 " kB\n", (uint64_t)0);
1295 printme = lbuf;
1296 } else if (startswith(line, "Active:")) {
1297 snprintf(lbuf, 100, "Active: %8" PRIu64 " kB\n",
1298 (mstat.total_active_anon +
1299 mstat.total_active_file) /
1300 1024);
1301 printme = lbuf;
1302 } else if (startswith(line, "Inactive:")) {
1303 snprintf(lbuf, 100, "Inactive: %8" PRIu64 " kB\n",
1304 (mstat.total_inactive_anon +
1305 mstat.total_inactive_file) /
1306 1024);
1307 printme = lbuf;
1308 } else if (startswith(line, "Active(anon):")) {
1309 snprintf(lbuf, 100, "Active(anon): %8" PRIu64 " kB\n",
1310 mstat.total_active_anon / 1024);
1311 printme = lbuf;
1312 } else if (startswith(line, "Inactive(anon):")) {
1313 snprintf(lbuf, 100, "Inactive(anon): %8" PRIu64 " kB\n",
1314 mstat.total_inactive_anon / 1024);
1315 printme = lbuf;
1316 } else if (startswith(line, "Active(file):")) {
1317 snprintf(lbuf, 100, "Active(file): %8" PRIu64 " kB\n",
1318 mstat.total_active_file / 1024);
1319 printme = lbuf;
1320 } else if (startswith(line, "Inactive(file):")) {
1321 snprintf(lbuf, 100, "Inactive(file): %8" PRIu64 " kB\n",
1322 mstat.total_inactive_file / 1024);
1323 printme = lbuf;
1324 } else if (startswith(line, "Unevictable:")) {
1325 snprintf(lbuf, 100, "Unevictable: %8" PRIu64 " kB\n",
1326 mstat.total_unevictable / 1024);
1327 printme = lbuf;
1328 } else if (startswith(line, "Dirty:")) {
1329 snprintf(lbuf, 100, "Dirty: %8" PRIu64 " kB\n",
1330 mstat.total_dirty / 1024);
1331 printme = lbuf;
1332 } else if (startswith(line, "Writeback:")) {
1333 snprintf(lbuf, 100, "Writeback: %8" PRIu64 " kB\n",
1334 mstat.total_writeback / 1024);
1335 printme = lbuf;
1336 } else if (startswith(line, "AnonPages:")) {
1337 snprintf(lbuf, 100, "AnonPages: %8" PRIu64 " kB\n",
1338 (mstat.total_active_anon +
1339 mstat.total_inactive_anon - mstat.total_shmem) /
1340 1024);
1341 printme = lbuf;
1342 } else if (startswith(line, "Mapped:")) {
1343 snprintf(lbuf, 100, "Mapped: %8" PRIu64 " kB\n",
1344 mstat.total_mapped_file / 1024);
1345 printme = lbuf;
1346 } else if (startswith(line, "SReclaimable:")) {
1347 snprintf(lbuf, 100, "SReclaimable: %8" PRIu64 " kB\n", (uint64_t)0);
1348 printme = lbuf;
1349 } else if (startswith(line, "SUnreclaim:")) {
1350 snprintf(lbuf, 100, "SUnreclaim: %8" PRIu64 " kB\n", (uint64_t)0);
1351 printme = lbuf;
1352 } else if (startswith(line, "Shmem:")) {
1353 snprintf(lbuf, 100, "Shmem: %8" PRIu64 " kB\n",
1354 mstat.total_shmem / 1024);
1355 printme = lbuf;
1356 } else if (startswith(line, "ShmemHugePages:")) {
1357 snprintf(lbuf, 100, "ShmemHugePages: %8" PRIu64 " kB\n", (uint64_t)0);
1358 printme = lbuf;
1359 } else if (startswith(line, "ShmemPmdMapped:")) {
1360 snprintf(lbuf, 100, "ShmemPmdMapped: %8" PRIu64 " kB\n", (uint64_t)0);
1361 printme = lbuf;
1362 } else if (startswith(line, "AnonHugePages:")) {
1363 snprintf(lbuf, 100, "AnonHugePages: %8" PRIu64 " kB\n",
1364 mstat.total_rss_huge / 1024);
1365 printme = lbuf;
1366 } else {
1367 printme = line;
1368 }
1369
1370 l = snprintf(cache, cache_size, "%s", printme);
1371 if (l < 0)
1372 return log_error(0, "Failed to write cache");
1373 if ((size_t)l >= cache_size)
1374 return log_error(0, "Write to cache was truncated");
1375
1376 cache += l;
1377 cache_size -= l;
1378 total_len += l;
1379 }
1380
1381 d->cached = 1;
1382 d->size = total_len;
1383 if (total_len > size)
1384 total_len = size;
1385 memcpy(buf, d->buf, total_len);
1386
1387 return total_len;
1388 }
1389
1390 static int proc_slabinfo_read(char *buf, size_t size, off_t offset,
1391 struct fuse_file_info *fi)
1392 {
1393 __do_free char *cgroup = NULL, *line = NULL;
1394 __do_free void *fopen_cache = NULL;
1395 __do_fclose FILE *f = NULL;
1396 __do_close int fd = -EBADF;
1397 struct fuse_context *fc = fuse_get_context();
1398 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1399 size_t linelen = 0, total_len = 0;
1400 char *cache = d->buf;
1401 size_t cache_size = d->buflen;
1402 pid_t initpid;
1403
1404 if (offset) {
1405 size_t left;
1406
1407 if (offset > d->size)
1408 return -EINVAL;
1409
1410 if (!d->cached)
1411 return 0;
1412
1413 left = d->size - offset;
1414 total_len = left > size ? size : left;
1415 memcpy(buf, cache + offset, total_len);
1416
1417 return total_len;
1418 }
1419
1420 initpid = lookup_initpid_in_store(fc->pid);
1421 if (initpid <= 1 || is_shared_pidns(initpid))
1422 initpid = fc->pid;
1423
1424 cgroup = get_pid_cgroup(initpid, "memory");
1425 if (!cgroup)
1426 return read_file_fuse("/proc/slabinfo", buf, size, d);
1427
1428 prune_init_slice(cgroup);
1429
1430 fd = cgroup_ops->get_memory_slabinfo_fd(cgroup_ops, cgroup);
1431 if (fd < 0)
1432 return read_file_fuse("/proc/slabinfo", buf, size, d);
1433
1434 f = fdopen_cached(fd, "re", &fopen_cache);
1435 if (!f)
1436 return read_file_fuse("/proc/slabinfo", buf, size, d);
1437
1438 while (getline(&line, &linelen, f) != -1) {
1439 ssize_t l = snprintf(cache, cache_size, "%s", line);
1440 if (l < 0)
1441 return log_error(0, "Failed to write cache");
1442 if ((size_t)l >= cache_size)
1443 return log_error(0, "Write to cache was truncated");
1444
1445 cache += l;
1446 cache_size -= l;
1447 total_len += l;
1448 }
1449
1450 d->cached = 1;
1451 d->size = total_len;
1452 if (total_len > size)
1453 total_len = size;
1454 memcpy(buf, d->buf, total_len);
1455
1456 return total_len;
1457 }
1458
1459 __lxcfs_fuse_ops int proc_read(const char *path, char *buf, size_t size,
1460 off_t offset, struct fuse_file_info *fi)
1461 {
1462 struct file_info *f = INTTYPE_TO_PTR(fi->fh);
1463
1464 switch (f->type) {
1465 case LXC_TYPE_PROC_MEMINFO:
1466 if (liblxcfs_functional())
1467 return proc_meminfo_read(buf, size, offset, fi);
1468
1469 return read_file_fuse_with_offset(LXC_TYPE_PROC_MEMINFO_PATH,
1470 buf, size, offset, f);
1471 case LXC_TYPE_PROC_CPUINFO:
1472 if (liblxcfs_functional())
1473 return proc_cpuinfo_read(buf, size, offset, fi);
1474
1475 return read_file_fuse_with_offset(LXC_TYPE_PROC_CPUINFO_PATH,
1476 buf, size, offset, f);
1477 case LXC_TYPE_PROC_UPTIME:
1478 if (liblxcfs_functional())
1479 return proc_uptime_read(buf, size, offset, fi);
1480
1481 return read_file_fuse_with_offset(LXC_TYPE_PROC_UPTIME_PATH,
1482 buf, size, offset, f);
1483 case LXC_TYPE_PROC_STAT:
1484 if (liblxcfs_functional())
1485 return proc_stat_read(buf, size, offset, fi);
1486
1487 return read_file_fuse_with_offset(LXC_TYPE_PROC_STAT_PATH, buf,
1488 size, offset, f);
1489 case LXC_TYPE_PROC_DISKSTATS:
1490 if (liblxcfs_functional())
1491 return proc_diskstats_read(buf, size, offset, fi);
1492
1493 return read_file_fuse_with_offset(LXC_TYPE_PROC_DISKSTATS_PATH,
1494 buf, size, offset, f);
1495 case LXC_TYPE_PROC_SWAPS:
1496 if (liblxcfs_functional())
1497 return proc_swaps_read(buf, size, offset, fi);
1498
1499 return read_file_fuse_with_offset(LXC_TYPE_PROC_SWAPS_PATH, buf,
1500 size, offset, f);
1501 case LXC_TYPE_PROC_LOADAVG:
1502 if (liblxcfs_functional())
1503 return proc_loadavg_read(buf, size, offset, fi);
1504
1505 return read_file_fuse_with_offset(LXC_TYPE_PROC_LOADAVG_PATH,
1506 buf, size, offset, f);
1507 case LXC_TYPE_PROC_SLABINFO:
1508 if (liblxcfs_functional())
1509 return proc_slabinfo_read(buf, size, offset, fi);
1510
1511 return read_file_fuse_with_offset(LXC_TYPE_PROC_SLABINFO_PATH,
1512 buf, size, offset, f);
1513 }
1514
1515 return -EINVAL;
1516 }