]> git.proxmox.com Git - mirror_lxcfs.git/blob - src/proc_fuse.c
Merge pull request #440 from loyou/master
[mirror_lxcfs.git] / src / proc_fuse.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6
7 #include "config.h"
8
9 #ifdef HAVE_FUSE3
10 #ifndef FUSE_USE_VERSION
11 #define FUSE_USE_VERSION 30
12 #endif
13 #else
14 #ifndef FUSE_USE_VERSION
15 #define FUSE_USE_VERSION 26
16 #endif
17 #endif
18
19 #define _FILE_OFFSET_BITS 64
20
21 #define __STDC_FORMAT_MACROS
22 #include <dirent.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <fuse.h>
26 #include <inttypes.h>
27 #include <libgen.h>
28 #include <pthread.h>
29 #include <sched.h>
30 #include <stdarg.h>
31 #include <stdbool.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <time.h>
37 #include <unistd.h>
38 #include <wait.h>
39 #include <linux/magic.h>
40 #include <linux/sched.h>
41 #include <sys/epoll.h>
42 #include <sys/mman.h>
43 #include <sys/mount.h>
44 #include <sys/param.h>
45 #include <sys/socket.h>
46 #include <sys/syscall.h>
47 #include <sys/sysinfo.h>
48 #include <sys/vfs.h>
49
50 #include "bindings.h"
51 #include "cgroup_fuse.h"
52 #include "cgroups/cgroup.h"
53 #include "cgroups/cgroup_utils.h"
54 #include "cpuset_parse.h"
55 #include "lxcfs_fuse_compat.h"
56 #include "memory_utils.h"
57 #include "proc_loadavg.h"
58 #include "proc_cpuview.h"
59 #include "utils.h"
60
61 struct memory_stat {
62 uint64_t hierarchical_memory_limit;
63 uint64_t hierarchical_memsw_limit;
64 uint64_t total_cache;
65 uint64_t total_rss;
66 uint64_t total_rss_huge;
67 uint64_t total_shmem;
68 uint64_t total_mapped_file;
69 uint64_t total_dirty;
70 uint64_t total_writeback;
71 uint64_t total_swap;
72 uint64_t total_pgpgin;
73 uint64_t total_pgpgout;
74 uint64_t total_pgfault;
75 uint64_t total_pgmajfault;
76 uint64_t total_inactive_anon;
77 uint64_t total_active_anon;
78 uint64_t total_inactive_file;
79 uint64_t total_active_file;
80 uint64_t total_unevictable;
81 };
82
83 __lxcfs_fuse_ops int proc_getattr(const char *path, struct stat *sb)
84 {
85 struct timespec now;
86
87 memset(sb, 0, sizeof(struct stat));
88 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
89 return -EINVAL;
90
91 sb->st_uid = sb->st_gid = 0;
92 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
93 if (strcmp(path, "/proc") == 0) {
94 sb->st_mode = S_IFDIR | 00555;
95 sb->st_nlink = 2;
96 return 0;
97 }
98
99 if (strcmp(path, "/proc/meminfo") == 0 ||
100 strcmp(path, "/proc/cpuinfo") == 0 ||
101 strcmp(path, "/proc/uptime") == 0 ||
102 strcmp(path, "/proc/stat") == 0 ||
103 strcmp(path, "/proc/diskstats") == 0 ||
104 strcmp(path, "/proc/swaps") == 0 ||
105 strcmp(path, "/proc/loadavg") == 0 ||
106 strcmp(path, "/proc/slabinfo") == 0) {
107 sb->st_size = 4096;
108 sb->st_mode = S_IFREG | 00444;
109 sb->st_nlink = 1;
110 return 0;
111 }
112
113 return -ENOENT;
114 }
115
116 __lxcfs_fuse_ops int proc_readdir(const char *path, void *buf,
117 fuse_fill_dir_t filler, off_t offset,
118 struct fuse_file_info *fi)
119 {
120 if (DIR_FILLER(filler, buf, ".", NULL, 0) != 0 ||
121 DIR_FILLER(filler, buf, "..", NULL, 0) != 0 ||
122 DIR_FILLER(filler, buf, "cpuinfo", NULL, 0) != 0 ||
123 DIR_FILLER(filler, buf, "meminfo", NULL, 0) != 0 ||
124 DIR_FILLER(filler, buf, "stat", NULL, 0) != 0 ||
125 DIR_FILLER(filler, buf, "uptime", NULL, 0) != 0 ||
126 DIR_FILLER(filler, buf, "diskstats", NULL, 0) != 0 ||
127 DIR_FILLER(filler, buf, "swaps", NULL, 0) != 0 ||
128 DIR_FILLER(filler, buf, "loadavg", NULL, 0) != 0 ||
129 DIR_FILLER(filler, buf, "slabinfo", NULL, 0) != 0)
130 return -EINVAL;
131
132 return 0;
133 }
134
135 static off_t get_procfile_size(const char *path)
136 {
137 __do_fclose FILE *f = NULL;
138 __do_free char *line = NULL;
139 size_t len = 0;
140 ssize_t sz, answer = 0;
141
142 f = fopen(path, "re");
143 if (!f)
144 return 0;
145
146 while ((sz = getline(&line, &len, f)) != -1)
147 answer += sz;
148
149 return answer;
150 }
151
152 __lxcfs_fuse_ops int proc_open(const char *path, struct fuse_file_info *fi)
153 {
154 __do_free struct file_info *info = NULL;
155 int type = -1;
156
157 if (strcmp(path, "/proc/meminfo") == 0)
158 type = LXC_TYPE_PROC_MEMINFO;
159 else if (strcmp(path, "/proc/cpuinfo") == 0)
160 type = LXC_TYPE_PROC_CPUINFO;
161 else if (strcmp(path, "/proc/uptime") == 0)
162 type = LXC_TYPE_PROC_UPTIME;
163 else if (strcmp(path, "/proc/stat") == 0)
164 type = LXC_TYPE_PROC_STAT;
165 else if (strcmp(path, "/proc/diskstats") == 0)
166 type = LXC_TYPE_PROC_DISKSTATS;
167 else if (strcmp(path, "/proc/swaps") == 0)
168 type = LXC_TYPE_PROC_SWAPS;
169 else if (strcmp(path, "/proc/loadavg") == 0)
170 type = LXC_TYPE_PROC_LOADAVG;
171 else if (strcmp(path, "/proc/slabinfo") == 0)
172 type = LXC_TYPE_PROC_SLABINFO;
173 if (type == -1)
174 return -ENOENT;
175
176 info = zalloc(sizeof(*info));
177 if (!info)
178 return -ENOMEM;
179
180 info->type = type;
181
182 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
183
184 info->buf = zalloc(info->buflen);
185 if (!info->buf)
186 return -ENOMEM;
187 /* set actual size to buffer size */
188 info->size = info->buflen;
189
190 fi->fh = PTR_TO_UINT64(move_ptr(info));
191 return 0;
192 }
193
194 __lxcfs_fuse_ops int proc_access(const char *path, int mask)
195 {
196 if (strcmp(path, "/proc") == 0 && access(path, R_OK) == 0)
197 return 0;
198
199 /* these are all read-only */
200 if ((mask & ~R_OK) != 0)
201 return -EACCES;
202
203 return 0;
204 }
205
206 __lxcfs_fuse_ops int proc_release(const char *path, struct fuse_file_info *fi)
207 {
208 do_release_file_info(fi);
209 return 0;
210 }
211
212 static uint64_t get_memlimit(const char *cgroup, bool swap)
213 {
214 __do_free char *memlimit_str = NULL;
215 uint64_t memlimit = 0;
216 int ret;
217
218 if (swap)
219 ret = cgroup_ops->get_memory_swap_max(cgroup_ops, cgroup, &memlimit_str);
220 else
221 ret = cgroup_ops->get_memory_max(cgroup_ops, cgroup, &memlimit_str);
222 if (ret > 0 && memlimit_str[0] && safe_uint64(memlimit_str, &memlimit, 10) < 0)
223 lxcfs_error("Failed to convert memlimit %s", memlimit_str);
224
225 return memlimit;
226 }
227
228 /*
229 * This function taken from glibc-2.32, as POSIX dirname("/some-dir") will
230 * return "/some-dir" as opposed to "/", which breaks `get_min_memlimit()`
231 */
232 static char *gnu_dirname(char *path)
233 {
234 static const char dot[] = ".";
235 char *last_slash;
236
237 /* Find last '/'. */
238 last_slash = path != NULL ? strrchr(path, '/') : NULL;
239
240 if (last_slash != NULL && last_slash != path && last_slash[1] == '\0') {
241 /* Determine whether all remaining characters are slashes. */
242 char *runp;
243
244 for (runp = last_slash; runp != path; --runp)
245 if (runp[-1] != '/')
246 break;
247
248 /* The '/' is the last character, we have to look further. */
249 if (runp != path)
250 last_slash = memrchr(path, '/', runp - path);
251 }
252
253 if (last_slash != NULL) {
254 /* Determine whether all remaining characters are slashes. */
255 char *runp;
256
257 for (runp = last_slash; runp != path; --runp)
258 if (runp[-1] != '/')
259 break;
260
261 /* Terminate the path. */
262 if (runp == path) {
263 /*
264 * The last slash is the first character in the string.
265 * We have to return "/". As a special case we have to
266 * return "//" if there are exactly two slashes at the
267 * beginning of the string. See XBD 4.10 Path Name
268 * Resolution for more information
269 */
270 if (last_slash == path + 1)
271 ++last_slash;
272 else
273 last_slash = path + 1;
274 } else
275 last_slash = runp;
276
277 last_slash[0] = '\0';
278 } else {
279 /*
280 * This assignment is ill-designed but the XPG specs require to
281 * return a string containing "." in any case no directory part
282 * is found and so a static and constant string is required.
283 */
284 path = (char *)dot;
285 }
286
287 return path;
288 }
289
290 static uint64_t get_min_memlimit(const char *cgroup, bool swap)
291 {
292 __do_free char *copy = NULL;
293 uint64_t memlimit = 0, retlimit = 0;
294
295 copy = strdup(cgroup);
296 if (!copy)
297 return log_error_errno(0, ENOMEM, "Failed to allocate memory");
298
299 retlimit = get_memlimit(copy, swap);
300
301 /*
302 * If the cgroup doesn't start with / (probably won't happen), dirname()
303 * will terminate with "" instead of "/"
304 */
305 while (*copy && strcmp(copy, "/") != 0) {
306 char *it = copy;
307
308 it = gnu_dirname(it);
309 memlimit = get_memlimit(it, swap);
310 if (memlimit > 0 && memlimit < retlimit)
311 retlimit = memlimit;
312 };
313
314 return retlimit;
315 }
316
317 static inline bool startswith(const char *line, const char *pref)
318 {
319 return strncmp(line, pref, strlen(pref)) == 0;
320 }
321
322 static int proc_swaps_read(char *buf, size_t size, off_t offset,
323 struct fuse_file_info *fi)
324 {
325 __do_free char *cgroup = NULL, *memusage_str = NULL,
326 *memswusage_str = NULL, *memswpriority_str = NULL;
327 struct fuse_context *fc = fuse_get_context();
328 struct lxcfs_opts *opts = (struct lxcfs_opts *)fuse_get_context()->private_data;
329 bool wants_swap = opts && !opts->swap_off && liblxcfs_can_use_swap();
330 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
331 uint64_t memswlimit = 0, memlimit = 0, memusage = 0, memswusage = 0,
332 swtotal = 0, swusage = 0, memswpriority = 1,
333 hostswtotal = 0, hostswfree = 0;
334 ssize_t total_len = 0;
335 ssize_t l = 0;
336 char *cache = d->buf;
337 int ret;
338 __do_free char *line = NULL;
339 __do_free void *fopen_cache = NULL;
340 __do_fclose FILE *f = NULL;
341 size_t linelen = 0;
342
343 if (offset) {
344 int left;
345
346 if (offset > d->size)
347 return -EINVAL;
348
349 if (!d->cached)
350 return 0;
351
352 left = d->size - offset;
353 total_len = left > size ? size: left;
354 memcpy(buf, cache + offset, total_len);
355
356 return total_len;
357 }
358
359 pid_t initpid = lookup_initpid_in_store(fc->pid);
360 if (initpid <= 1 || is_shared_pidns(initpid))
361 initpid = fc->pid;
362
363 cgroup = get_pid_cgroup(initpid, "memory");
364 if (!cgroup)
365 return read_file_fuse("/proc/swaps", buf, size, d);
366 prune_init_slice(cgroup);
367
368 memlimit = get_min_memlimit(cgroup, false);
369
370 ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str);
371 if (ret < 0)
372 return 0;
373
374 if (safe_uint64(memusage_str, &memusage, 10) < 0)
375 lxcfs_error("Failed to convert memusage %s", memusage_str);
376
377 if (wants_swap) {
378 memswlimit = get_min_memlimit(cgroup, true);
379 if (memswlimit > 0) {
380 ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str);
381 if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) {
382 if (memlimit > memswlimit)
383 swtotal = 0;
384 else
385 swtotal = (memswlimit - memlimit) / 1024;
386 if (memusage > memswusage || swtotal == 0)
387 swusage = 0;
388 else
389 swusage = (memswusage - memusage) / 1024;
390 }
391
392 ret = cgroup_ops->get_memory_swappiness(cgroup_ops, cgroup, &memswpriority_str);
393 if (ret >= 0)
394 safe_uint64(memswpriority_str, &memswpriority, 10);
395 }
396 }
397
398 total_len = snprintf(d->buf, d->size, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
399
400 /* Read host total and free values */
401 f = fopen_cached("/proc/meminfo", "re", &fopen_cache);
402 if (!f)
403 return 0;
404
405 while (getline(&line, &linelen, f) != -1) {
406 if (startswith(line, "SwapTotal:"))
407 sscanf(line, "SwapTotal: %8" PRIu64 " kB", &hostswtotal);
408 else if (startswith(line, "SwapFree:"))
409 sscanf(line, "SwapFree: %8" PRIu64 " kB", &hostswfree);
410 }
411
412 if (wants_swap) {
413 /* The total amount of swap is always reported to be the
414 lesser of the RAM+SWAP limit or the SWAP device size.
415 This is because the kernel can swap as much as it
416 wants and not only up to swtotal. */
417 swtotal = memlimit / 1024 + swtotal;
418 if (hostswtotal < swtotal) {
419 swtotal = hostswtotal;
420 }
421
422 /* When swappiness is 0, pretend we can't swap. */
423 if (memswpriority == 0) {
424 swtotal = swusage;
425 }
426 }
427
428 if (swtotal > 0) {
429 l = snprintf(d->buf + total_len, d->size - total_len,
430 "none%*svirtual\t\t%" PRIu64 "\t%" PRIu64 "\t0\n",
431 36, " ", swtotal, swusage);
432 total_len += l;
433 }
434
435 if (total_len < 0 || l < 0)
436 return log_error(0, "Failed writing to cache");
437
438 d->cached = 1;
439 d->size = (int)total_len;
440
441 if (total_len > size)
442 total_len = size;
443 memcpy(buf, d->buf, total_len);
444
445 return total_len;
446 }
447
448 static void get_blkio_io_value(char *str, unsigned major, unsigned minor,
449 char *iotype, uint64_t *v)
450 {
451 char *eol;
452 char key[32];
453 size_t len;
454
455 memset(key, 0, 32);
456 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
457
458 *v = 0;
459 len = strlen(key);
460 while (*str) {
461 if (startswith(str, key)) {
462 sscanf(str + len, "%lu", v);
463 return;
464 }
465 eol = strchr(str, '\n');
466 if (!eol)
467 return;
468 str = eol+1;
469 }
470 }
471
472 struct lxcfs_diskstats {
473 unsigned int major; /* 1 - major number */
474 unsigned int minor; /* 2 - minor mumber */
475 char dev_name[72]; /* 3 - device name */
476 uint64_t read; /* 4 - reads completed successfully */
477 uint64_t read_merged; /* 5 - reads merged */
478 uint64_t read_sectors; /* 6 - sectors read */
479 uint64_t read_ticks; /* 7 - time spent reading (ms) */
480 uint64_t write; /* 8 - writes completed */
481 uint64_t write_merged; /* 9 - writes merged */
482 uint64_t write_sectors; /* 10 - sectors written */
483 uint64_t write_ticks; /* 11 - time spent writing (ms) */
484 uint64_t ios_pgr; /* 12 - I/Os currently in progress */
485 uint64_t total_ticks; /* 13 - time spent doing I/Os (ms) */
486 uint64_t rq_ticks; /* 14 - weighted time spent doing I/Os (ms) */
487 uint64_t discard; /* 15 - discards completed successfully (4.18+) */
488 uint64_t discard_merged; /* 16 - discards merged (4.18+) */
489 uint64_t discard_sectors; /* 17 - sectors discarded (4.18+) */
490 uint64_t discard_ticks; /* 18 - time spent discarding (4.18+) */
491 };
492
493 static int proc_diskstats_read(char *buf, size_t size, off_t offset,
494 struct fuse_file_info *fi)
495 {
496 __do_free char *cg = NULL, *io_serviced_str = NULL,
497 *io_merged_str = NULL, *io_service_bytes_str = NULL,
498 *io_wait_time_str = NULL, *io_service_time_str = NULL,
499 *line = NULL;
500 __do_free void *fopen_cache = NULL;
501 __do_fclose FILE *f = NULL;
502 struct fuse_context *fc = fuse_get_context();
503 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
504 struct lxcfs_diskstats stats = {};
505 /* helper fields */
506 uint64_t read_service_time, write_service_time, discard_service_time, read_wait_time,
507 write_wait_time, discard_wait_time;
508 char *cache = d->buf;
509 size_t cache_size = d->buflen;
510 size_t linelen = 0, total_len = 0;
511 int i = 0;
512 int ret;
513
514 if (offset) {
515 int left;
516
517 if (offset > d->size)
518 return -EINVAL;
519
520 if (!d->cached)
521 return 0;
522
523 left = d->size - offset;
524 total_len = left > size ? size: left;
525 memcpy(buf, cache + offset, total_len);
526
527 return total_len;
528 }
529
530 pid_t initpid = lookup_initpid_in_store(fc->pid);
531 if (initpid <= 1 || is_shared_pidns(initpid))
532 initpid = fc->pid;
533
534 cg = get_pid_cgroup(initpid, "blkio");
535 if (!cg)
536 return read_file_fuse("/proc/diskstats", buf, size, d);
537 prune_init_slice(cg);
538
539 ret = cgroup_ops->get_io_serviced(cgroup_ops, cg, &io_serviced_str);
540 if (ret < 0) {
541 if (ret == -EOPNOTSUPP)
542 return read_file_fuse("/proc/diskstats", buf, size, d);
543 }
544
545 ret = cgroup_ops->get_io_merged(cgroup_ops, cg, &io_merged_str);
546 if (ret < 0) {
547 if (ret == -EOPNOTSUPP)
548 return read_file_fuse("/proc/diskstats", buf, size, d);
549 }
550
551 ret = cgroup_ops->get_io_service_bytes(cgroup_ops, cg, &io_service_bytes_str);
552 if (ret < 0) {
553 if (ret == -EOPNOTSUPP)
554 return read_file_fuse("/proc/diskstats", buf, size, d);
555 }
556
557 ret = cgroup_ops->get_io_wait_time(cgroup_ops, cg, &io_wait_time_str);
558 if (ret < 0) {
559 if (ret == -EOPNOTSUPP)
560 return read_file_fuse("/proc/diskstats", buf, size, d);
561 }
562
563 ret = cgroup_ops->get_io_service_time(cgroup_ops, cg, &io_service_time_str);
564 if (ret < 0) {
565 if (ret == -EOPNOTSUPP)
566 return read_file_fuse("/proc/diskstats", buf, size, d);
567 }
568
569 f = fopen_cached("/proc/diskstats", "re", &fopen_cache);
570 if (!f)
571 return 0;
572
573 while (getline(&line, &linelen, f) != -1) {
574 ssize_t l;
575 char lbuf[256];
576
577 i = sscanf(line, "%u %u %71s", &stats.major, &stats.minor, stats.dev_name);
578 if (i != 3)
579 continue;
580
581 get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Read", &stats.read);
582 get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Write", &stats.write);
583 get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Discard", &stats.discard);
584
585 get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Read", &stats.read_merged);
586 get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Write", &stats.write_merged);
587 get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Discard", &stats.discard_merged);
588
589 get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Read", &stats.read_sectors);
590 stats.read_sectors = stats.read_sectors / 512;
591 get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Write", &stats.write_sectors);
592 stats.write_sectors = stats.write_sectors / 512;
593 get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Discard", &stats.discard_sectors);
594 stats.discard_sectors = stats.discard_sectors / 512;
595
596 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Read", &read_service_time);
597 read_service_time = read_service_time / 1000000;
598 get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Read", &read_wait_time);
599 read_wait_time = read_wait_time / 1000000;
600 stats.read_ticks = read_service_time + read_wait_time;
601
602 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Write", &write_service_time);
603 write_service_time = write_service_time / 1000000;
604 get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Write", &write_wait_time);
605 write_wait_time = write_wait_time / 1000000;
606 stats.write_ticks = write_service_time + write_wait_time;
607
608 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Discard", &discard_service_time);
609 discard_service_time = discard_service_time / 1000000;
610 get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Discard", &discard_wait_time);
611 discard_wait_time = discard_wait_time / 1000000;
612 stats.discard_ticks = discard_service_time + discard_wait_time;
613
614 get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Total", &stats.total_ticks);
615 stats.total_ticks = stats.total_ticks / 1000000;
616
617 memset(lbuf, 0, 256);
618 if (stats.read || stats.write || stats.read_merged || stats.write_merged ||
619 stats.read_sectors || stats.write_sectors || stats.read_ticks ||
620 stats.write_ticks || stats.ios_pgr || stats.total_ticks || stats.rq_ticks ||
621 stats.discard_merged || stats.discard_sectors || stats.discard_ticks)
622 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
623 stats.major,
624 stats.minor,
625 stats.dev_name,
626 stats.read,
627 stats.read_merged,
628 stats.read_sectors,
629 stats.read_ticks,
630 stats.write,
631 stats.write_merged,
632 stats.write_sectors,
633 stats.write_ticks,
634 stats.ios_pgr,
635 stats.total_ticks,
636 stats.rq_ticks,
637 stats.discard_merged,
638 stats.discard_sectors,
639 stats.discard_ticks);
640 else
641 continue;
642
643 l = snprintf(cache, cache_size, "%s", lbuf);
644 if (l < 0)
645 return log_error(0, "Failed to write cache");
646 if (l >= cache_size)
647 return log_error(0, "Write to cache was truncated");
648
649 cache += l;
650 cache_size -= l;
651 total_len += l;
652 }
653
654 d->cached = 1;
655 d->size = total_len;
656 if (total_len > size)
657 total_len = size;
658 memcpy(buf, d->buf, total_len);
659
660 return total_len;
661 }
662
663 #if RELOADTEST
664 static inline void iwashere(void)
665 {
666 mknod("/tmp/lxcfs-iwashere", S_IFREG, 0644);
667 }
668 #endif
669
670 /*
671 * This function retrieves the busy time of a group of tasks by looking at
672 * cpuacct.usage. Unfortunately, this only makes sense when the container has
673 * been given it's own cpuacct cgroup. If not, this function will take the busy
674 * time of all other taks that do not actually belong to the container into
675 * account as well. If someone has a clever solution for this please send a
676 * patch!
677 */
678 static double get_reaper_busy(pid_t task)
679 {
680 __do_free char *cgroup = NULL, *usage_str = NULL;
681 uint64_t usage = 0;
682 pid_t initpid;
683
684 initpid = lookup_initpid_in_store(task);
685 if (initpid <= 0)
686 return 0;
687
688 cgroup = get_pid_cgroup(initpid, "cpuacct");
689 if (!cgroup)
690 return 0;
691 prune_init_slice(cgroup);
692
693 if (!cgroup_ops->get(cgroup_ops, "cpuacct", cgroup, "cpuacct.usage", &usage_str))
694 return 0;
695
696 if (safe_uint64(usage_str, &usage, 10) < 0)
697 lxcfs_error("Failed to convert usage %s", usage_str);
698
699 return ((double)usage / 1000000000);
700 }
701
702 static uint64_t get_reaper_start_time(pid_t pid)
703 {
704 __do_free void *fopen_cache = NULL;
705 __do_fclose FILE *f = NULL;
706 int ret;
707 uint64_t starttime;
708 char path[STRLITERALLEN("/proc/") + LXCFS_NUMSTRLEN64 +
709 STRLITERALLEN("/stat") + 1];
710 pid_t qpid;
711
712 qpid = lookup_initpid_in_store(pid);
713 if (qpid <= 0)
714 return ret_errno(EINVAL);
715
716 ret = snprintf(path, sizeof(path), "/proc/%d/stat", qpid);
717 if (ret < 0 || (size_t)ret >= sizeof(path))
718 return ret_errno(EINVAL);
719
720 f = fopen_cached(path, "re", &fopen_cache);
721 if (!f)
722 return ret_errno(EINVAL);
723
724 /* Note that the *scanf() argument supression requires that length
725 * modifiers such as "l" are omitted. Otherwise some compilers will yell
726 * at us. It's like telling someone you're not married and then asking
727 * if you can bring your wife to the party.
728 */
729 ret = fscanf(f, "%*d " /* (1) pid %d */
730 "%*s " /* (2) comm %s */
731 "%*c " /* (3) state %c */
732 "%*d " /* (4) ppid %d */
733 "%*d " /* (5) pgrp %d */
734 "%*d " /* (6) session %d */
735 "%*d " /* (7) tty_nr %d */
736 "%*d " /* (8) tpgid %d */
737 "%*u " /* (9) flags %u */
738 "%*u " /* (10) minflt %lu */
739 "%*u " /* (11) cminflt %lu */
740 "%*u " /* (12) majflt %lu */
741 "%*u " /* (13) cmajflt %lu */
742 "%*u " /* (14) utime %lu */
743 "%*u " /* (15) stime %lu */
744 "%*d " /* (16) cutime %ld */
745 "%*d " /* (17) cstime %ld */
746 "%*d " /* (18) priority %ld */
747 "%*d " /* (19) nice %ld */
748 "%*d " /* (20) num_threads %ld */
749 "%*d " /* (21) itrealvalue %ld */
750 "%" PRIu64, /* (22) starttime %llu */
751 &starttime);
752 if (ret != 1)
753 return ret_errno(EINVAL);
754
755 return ret_set_errno(starttime, 0);
756 }
757
758 static double get_reaper_start_time_in_sec(pid_t pid)
759 {
760 uint64_t clockticks, ticks_per_sec;
761 int64_t ret;
762 double res = 0;
763
764 clockticks = get_reaper_start_time(pid);
765 if (clockticks <= 0)
766 return log_debug(0, "Failed to retrieve start time of pid %d", pid);
767
768 ret = sysconf(_SC_CLK_TCK);
769 if (ret < 0)
770 return log_debug(0, "Failed to determine number of clock ticks in a second");
771
772 ticks_per_sec = (uint64_t)ret;
773 res = (double)clockticks / ticks_per_sec;
774 return res;
775 }
776
777 static double get_reaper_age(pid_t pid)
778 {
779 uint64_t uptime_ms;
780 double procstart, procage;
781
782 /*
783 * We need to substract the time the process has started since system
784 * boot minus the time when the system has started to get the actual
785 * reaper age.
786 */
787 procstart = get_reaper_start_time_in_sec(pid);
788 procage = procstart;
789 if (procstart > 0) {
790 int ret;
791 struct timespec spec;
792
793 ret = clock_gettime(CLOCK_BOOTTIME, &spec);
794 if (ret < 0)
795 return 0;
796
797 uptime_ms = (spec.tv_sec * 1000) + (spec.tv_nsec * 1e-6);
798 procage = (uptime_ms - (procstart * 1000)) / 1000;
799 }
800
801 return procage;
802 }
803
804 /*
805 * We read /proc/uptime and reuse its second field.
806 * For the first field, we use the mtime for the reaper for
807 * the calling pid as returned by getreaperage
808 */
809 static int proc_uptime_read(char *buf, size_t size, off_t offset,
810 struct fuse_file_info *fi)
811 {
812 struct fuse_context *fc = fuse_get_context();
813 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
814 char *cache = d->buf;
815 ssize_t total_len = 0, ret = 0;
816 double busytime, idletime, reaperage;
817
818 #if RELOADTEST
819 iwashere();
820 #endif
821
822 if (offset) {
823 int left;
824
825 if (offset > d->size)
826 return -EINVAL;
827
828 if (!d->cached)
829 return 0;
830
831 left = d->size - offset;
832 total_len = left > size ? size : left;
833 memcpy(buf, cache + offset, total_len);
834
835 return total_len;
836 }
837
838 reaperage = get_reaper_age(fc->pid);
839 /*
840 * To understand why this is done, please read the comment to the
841 * get_reaper_busy() function.
842 */
843 idletime = reaperage;
844 busytime = get_reaper_busy(fc->pid);
845 if (reaperage >= busytime)
846 idletime = reaperage - busytime;
847
848 ret = snprintf(d->buf, d->buflen, "%.2lf %.2lf\n", reaperage, idletime);
849 if (ret < 0 || ret >= d->buflen)
850 return read_file_fuse("/proc/uptime", buf, size, d);
851 total_len = ret;
852
853 d->cached = 1;
854 d->size = total_len;
855 if (total_len > size)
856 total_len = size;
857 memcpy(buf, d->buf, total_len);
858
859 return total_len;
860 }
861
862 #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2)
863 static int proc_stat_read(char *buf, size_t size, off_t offset,
864 struct fuse_file_info *fi)
865 {
866 __do_free char *cg = NULL, *cpuset = NULL, *line = NULL;
867 __do_free void *fopen_cache = NULL;
868 __do_free struct cpuacct_usage *cg_cpu_usage = NULL;
869 __do_fclose FILE *f = NULL;
870 struct fuse_context *fc = fuse_get_context();
871 struct lxcfs_opts *opts = (struct lxcfs_opts *)fc->private_data;
872 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
873 size_t linelen = 0, total_len = 0;
874 int curcpu = -1; /* cpu numbering starts at 0 */
875 int physcpu = 0;
876 uint64_t user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0,
877 softirq = 0, steal = 0, guest = 0, guest_nice = 0;
878 uint64_t user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0,
879 iowait_sum = 0, irq_sum = 0, softirq_sum = 0, steal_sum = 0,
880 guest_sum = 0, guest_nice_sum = 0;
881 char cpuall[CPUALL_MAX_SIZE];
882 /* reserve for cpu all */
883 char *cache = d->buf + CPUALL_MAX_SIZE;
884 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
885 int cg_cpu_usage_size = 0;
886
887 if (offset) {
888 int left;
889
890 if (offset > d->size)
891 return -EINVAL;
892
893 if (!d->cached)
894 return 0;
895
896 left = d->size - offset;
897 total_len = left > size ? size : left;
898 memcpy(buf, d->buf + offset, total_len);
899
900 return total_len;
901 }
902
903 pid_t initpid = lookup_initpid_in_store(fc->pid);
904 if (initpid <= 1 || is_shared_pidns(initpid))
905 initpid = fc->pid;
906
907 /*
908 * when container run with host pid namespace initpid == 1, cgroup will "/"
909 * we should return host os's /proc contents.
910 * in some case cpuacct_usage.all in "/" will larger then /proc/stat
911 */
912 if (initpid == 1)
913 return read_file_fuse("/proc/stat", buf, size, d);
914
915 cg = get_pid_cgroup(initpid, "cpuset");
916 if (!cg)
917 return read_file_fuse("/proc/stat", buf, size, d);
918 prune_init_slice(cg);
919
920 cpuset = get_cpuset(cg);
921 if (!cpuset)
922 return 0;
923
924 f = fopen_cached("/proc/stat", "re", &fopen_cache);
925 if (!f)
926 return 0;
927
928 /* Skip first system cpu line. */
929 if (getline(&line, &linelen, f) < 0)
930 return log_error(0, "proc_stat_read read first line failed");
931
932 /*
933 * Read cpuacct.usage_all for all CPUs.
934 * If the cpuacct cgroup is present, it is used to calculate the container's
935 * CPU usage. If not, values from the host's /proc/stat are used.
936 */
937 if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage, &cg_cpu_usage_size) == 0) {
938 if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs) {
939 total_len = cpuview_proc_stat(cg, cpuset, cg_cpu_usage,
940 cg_cpu_usage_size, f,
941 d->buf, d->buflen);
942 goto out;
943 }
944 } else {
945 lxcfs_v("proc_stat_read failed to read from cpuacct, falling back to the host's /proc/stat");
946 }
947
948 while (getline(&line, &linelen, f) != -1) {
949 ssize_t l;
950 char cpu_char[10]; /* That's a lot of cores */
951 char *c;
952 uint64_t all_used, cg_used, new_idle;
953 int ret;
954
955 if (strlen(line) == 0)
956 continue;
957 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
958 /* not a ^cpuN line containing a number N, just print it */
959 l = snprintf(cache, cache_size, "%s", line);
960 if (l < 0)
961 return log_error(0, "Failed to write cache");
962 if (l >= cache_size)
963 return log_error(0, "Write to cache was truncated");
964
965 cache += l;
966 cache_size -= l;
967 total_len += l;
968
969 continue;
970 }
971
972 if (sscanf(cpu_char, "%d", &physcpu) != 1)
973 continue;
974
975 if (!cpu_in_cpuset(physcpu, cpuset))
976 continue;
977
978 curcpu++;
979
980 ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
981 &user,
982 &nice,
983 &system,
984 &idle,
985 &iowait,
986 &irq,
987 &softirq,
988 &steal,
989 &guest,
990 &guest_nice);
991 if (ret != 10 || !cg_cpu_usage) {
992 c = strchr(line, ' ');
993 if (!c)
994 continue;
995
996 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
997 if (l < 0)
998 return log_error(0, "Failed to write cache");
999 if (l >= cache_size)
1000 return log_error(0, "Write to cache was truncated");
1001
1002 cache += l;
1003 cache_size -= l;
1004 total_len += l;
1005
1006 if (ret != 10)
1007 continue;
1008 }
1009
1010 if (cg_cpu_usage) {
1011 if (physcpu >= cg_cpu_usage_size)
1012 break;
1013
1014 all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice;
1015 cg_used = cg_cpu_usage[physcpu].user + cg_cpu_usage[physcpu].system;
1016
1017 if (all_used >= cg_used) {
1018 new_idle = idle + (all_used - cg_used);
1019
1020 } else {
1021 lxcfs_error("cpu%d from %s has unexpected cpu time: %" PRIu64 " in /proc/stat, %" PRIu64 " in cpuacct.usage_all; unable to determine idle time",
1022 curcpu, cg, all_used, cg_used);
1023 new_idle = idle;
1024 }
1025
1026 l = snprintf(cache, cache_size,
1027 "cpu%d %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n",
1028 curcpu, cg_cpu_usage[physcpu].user,
1029 cg_cpu_usage[physcpu].system, new_idle);
1030 if (l < 0)
1031 return log_error(0, "Failed to write cache");
1032 if (l >= cache_size)
1033 return log_error(0, "Write to cache was truncated");
1034
1035 cache += l;
1036 cache_size -= l;
1037 total_len += l;
1038
1039 user_sum += cg_cpu_usage[physcpu].user;
1040 system_sum += cg_cpu_usage[physcpu].system;
1041 idle_sum += new_idle;
1042 } else {
1043 user_sum += user;
1044 nice_sum += nice;
1045 system_sum += system;
1046 idle_sum += idle;
1047 iowait_sum += iowait;
1048 irq_sum += irq;
1049 softirq_sum += softirq;
1050 steal_sum += steal;
1051 guest_sum += guest;
1052 guest_nice_sum += guest_nice;
1053 }
1054 }
1055
1056 cache = d->buf;
1057
1058 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
1059 user_sum,
1060 nice_sum,
1061 system_sum,
1062 idle_sum,
1063 iowait_sum,
1064 irq_sum,
1065 softirq_sum,
1066 steal_sum,
1067 guest_sum,
1068 guest_nice_sum);
1069 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE) {
1070 memcpy(cache, cpuall, cpuall_len);
1071 cache += cpuall_len;
1072 } else {
1073 /* shouldn't happen */
1074 lxcfs_error("proc_stat_read copy cpuall failed, cpuall_len=%d", cpuall_len);
1075 cpuall_len = 0;
1076 }
1077
1078 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
1079 total_len += cpuall_len;
1080
1081 out:
1082 d->cached = 1;
1083 d->size = total_len;
1084 if (total_len > size)
1085 total_len = size;
1086
1087 memcpy(buf, d->buf, total_len);
1088 return total_len;
1089 }
1090
1091 /* Note that "memory.stat" in cgroup2 is hierarchical by default. */
1092 static bool cgroup_parse_memory_stat(const char *cgroup, struct memory_stat *mstat)
1093 {
1094 __do_close int fd = -EBADF;
1095 __do_fclose FILE *f = NULL;
1096 __do_free char *line = NULL;
1097 __do_free void *fdopen_cache = NULL;
1098 bool unified;
1099 size_t len = 0;
1100 ssize_t linelen;
1101
1102 fd = cgroup_ops->get_memory_stats_fd(cgroup_ops, cgroup);
1103 if (fd < 0)
1104 return false;
1105
1106 f = fdopen_cached(fd, "re", &fdopen_cache);
1107 if (!f)
1108 return false;
1109
1110 unified = pure_unified_layout(cgroup_ops);
1111 while ((linelen = getline(&line, &len, f)) != -1) {
1112 if (!unified && startswith(line, "hierarchical_memory_limit")) {
1113 sscanf(line, "hierarchical_memory_limit %" PRIu64, &(mstat->hierarchical_memory_limit));
1114 } else if (!unified && startswith(line, "hierarchical_memsw_limit")) {
1115 sscanf(line, "hierarchical_memsw_limit %" PRIu64, &(mstat->hierarchical_memsw_limit));
1116 } else if (startswith(line, unified ? "file" :"total_cache")) {
1117 sscanf(line, unified ? "file %" PRIu64 : "total_cache %" PRIu64, &(mstat->total_cache));
1118 } else if (!unified && startswith(line, "total_rss")) {
1119 sscanf(line, "total_rss %" PRIu64, &(mstat->total_rss));
1120 } else if (!unified && startswith(line, "total_rss_huge")) {
1121 sscanf(line, "total_rss_huge %" PRIu64, &(mstat->total_rss_huge));
1122 } else if (startswith(line, unified ? "shmem" : "total_shmem")) {
1123 sscanf(line, unified ? "shmem %" PRIu64 : "total_shmem %" PRIu64, &(mstat->total_shmem));
1124 } else if (startswith(line, unified ? "file_mapped" : "total_mapped_file")) {
1125 sscanf(line, unified ? "file_mapped %" PRIu64 : "total_mapped_file %" PRIu64, &(mstat->total_mapped_file));
1126 } else if (!unified && startswith(line, "total_dirty")) {
1127 sscanf(line, "total_dirty %" PRIu64, &(mstat->total_dirty));
1128 } else if (!unified && startswith(line, "total_writeback")) {
1129 sscanf(line, "total_writeback %" PRIu64, &(mstat->total_writeback));
1130 } else if (!unified && startswith(line, "total_swap")) {
1131 sscanf(line, "total_swap %" PRIu64, &(mstat->total_swap));
1132 } else if (!unified && startswith(line, "total_pgpgin")) {
1133 sscanf(line, "total_pgpgin %" PRIu64, &(mstat->total_pgpgin));
1134 } else if (!unified && startswith(line, "total_pgpgout")) {
1135 sscanf(line, "total_pgpgout %" PRIu64, &(mstat->total_pgpgout));
1136 } else if (startswith(line, unified ? "pgfault" : "total_pgfault")) {
1137 sscanf(line, unified ? "pgfault %" PRIu64 : "total_pgfault %" PRIu64, &(mstat->total_pgfault));
1138 } else if (startswith(line, unified ? "pgmajfault" : "total_pgmajfault")) {
1139 sscanf(line, unified ? "pgmajfault %" PRIu64 : "total_pgmajfault %" PRIu64, &(mstat->total_pgmajfault));
1140 } else if (startswith(line, unified ? "inactive_anon" : "total_inactive_anon")) {
1141 sscanf(line, unified ? "inactive_anon %" PRIu64 : "total_inactive_anon %" PRIu64, &(mstat->total_inactive_anon));
1142 } else if (startswith(line, unified ? "active_anon" : "total_active_anon")) {
1143 sscanf(line, unified ? "active_anon %" PRIu64 : "total_active_anon %" PRIu64, &(mstat->total_active_anon));
1144 } else if (startswith(line, unified ? "inactive_file" : "total_inactive_file")) {
1145 sscanf(line, unified ? "inactive_file %" PRIu64 : "total_inactive_file %" PRIu64, &(mstat->total_inactive_file));
1146 } else if (startswith(line, unified ? "active_file" : "total_active_file")) {
1147 sscanf(line, unified ? "active_file %" PRIu64 : "total_active_file %" PRIu64, &(mstat->total_active_file));
1148 } else if (startswith(line, unified ? "unevictable" : "total_unevictable")) {
1149 sscanf(line, unified ? "unevictable %" PRIu64 : "total_unevictable %" PRIu64, &(mstat->total_unevictable));
1150 }
1151 }
1152
1153 return true;
1154 }
1155
1156 static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1157 struct fuse_file_info *fi)
1158 {
1159 __do_free char *cgroup = NULL, *line = NULL, *memusage_str = NULL,
1160 *memswusage_str = NULL, *memswpriority_str = NULL;
1161 __do_free void *fopen_cache = NULL;
1162 __do_fclose FILE *f = NULL;
1163 struct fuse_context *fc = fuse_get_context();
1164 struct lxcfs_opts *opts = (struct lxcfs_opts *)fuse_get_context()->private_data;
1165 bool wants_swap = opts && !opts->swap_off && liblxcfs_can_use_swap();
1166 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1167 uint64_t memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0,
1168 hosttotal = 0, swfree = 0, swusage = 0, swtotal = 0,
1169 memswpriority = 1;
1170 struct memory_stat mstat = {};
1171 size_t linelen = 0, total_len = 0;
1172 char *cache = d->buf;
1173 size_t cache_size = d->buflen;
1174 int ret;
1175
1176 if (offset) {
1177 int left;
1178
1179 if (offset > d->size)
1180 return -EINVAL;
1181
1182 if (!d->cached)
1183 return 0;
1184
1185 left = d->size - offset;
1186 total_len = left > size ? size : left;
1187 memcpy(buf, cache + offset, total_len);
1188
1189 return total_len;
1190 }
1191
1192 pid_t initpid = lookup_initpid_in_store(fc->pid);
1193 if (initpid <= 1 || is_shared_pidns(initpid))
1194 initpid = fc->pid;
1195
1196 cgroup = get_pid_cgroup(initpid, "memory");
1197 if (!cgroup)
1198 return read_file_fuse("/proc/meminfo", buf, size, d);
1199
1200 prune_init_slice(cgroup);
1201
1202 /* memory limits */
1203 ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str);
1204 if (ret < 0)
1205 return read_file_fuse("/proc/meminfo", buf, size, d);
1206
1207 if (safe_uint64(memusage_str, &memusage, 10) < 0)
1208 lxcfs_error("Failed to convert memusage %s", memusage_str);
1209
1210 if (!cgroup_parse_memory_stat(cgroup, &mstat))
1211 return read_file_fuse("/proc/meminfo", buf, size, d);
1212
1213 memlimit = get_min_memlimit(cgroup, false);
1214
1215 /*
1216 * Following values are allowed to fail, because swapaccount might be
1217 * turned off for current kernel.
1218 */
1219 if (wants_swap) {
1220 memswlimit = get_min_memlimit(cgroup, true);
1221 if (memswlimit > 0) {
1222 ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str);
1223 if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) {
1224 if (memlimit > memswlimit)
1225 swtotal = 0;
1226 else
1227 swtotal = (memswlimit - memlimit) / 1024;
1228 if (memusage > memswusage || swtotal == 0)
1229 swusage = 0;
1230 else
1231 swusage = (memswusage - memusage) / 1024;
1232 }
1233 }
1234
1235 ret = cgroup_ops->get_memory_swappiness(cgroup_ops, cgroup, &memswpriority_str);
1236 if (ret >= 0)
1237 safe_uint64(memswpriority_str, &memswpriority, 10);
1238 }
1239
1240 f = fopen_cached("/proc/meminfo", "re", &fopen_cache);
1241 if (!f)
1242 return read_file_fuse("/proc/meminfo", buf, size, d);
1243
1244 memusage /= 1024;
1245 memlimit /= 1024;
1246 while (getline(&line, &linelen, f) != -1) {
1247 ssize_t l;
1248 char *printme, lbuf[100];
1249
1250 memset(lbuf, 0, 100);
1251 if (startswith(line, "MemTotal:")) {
1252 sscanf(line+sizeof("MemTotal:")-1, "%" PRIu64, &hosttotal);
1253 if (memlimit == 0)
1254 memlimit = hosttotal;
1255
1256 if (hosttotal < memlimit)
1257 memlimit = hosttotal;
1258 snprintf(lbuf, 100, "MemTotal: %8" PRIu64 " kB\n", memlimit);
1259 printme = lbuf;
1260 } else if (startswith(line, "MemFree:")) {
1261 snprintf(lbuf, 100, "MemFree: %8" PRIu64 " kB\n", memlimit - memusage);
1262 printme = lbuf;
1263 } else if (startswith(line, "MemAvailable:")) {
1264 snprintf(lbuf, 100, "MemAvailable: %8" PRIu64 " kB\n", memlimit - memusage + mstat.total_cache / 1024);
1265 printme = lbuf;
1266 } else if (startswith(line, "SwapTotal:")) {
1267 if (wants_swap) {
1268 uint64_t hostswtotal = 0;
1269
1270 sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal);
1271
1272 /* The total amount of swap is always reported to be the
1273 lesser of the RAM+SWAP limit or the SWAP device size.
1274 This is because the kernel can swap as much as it
1275 wants and not only up to swtotal. */
1276
1277 swtotal = memlimit + swtotal;
1278 if (hostswtotal < swtotal) {
1279 swtotal = hostswtotal;
1280 }
1281
1282 /* When swappiness is 0, pretend we can't swap. */
1283 if (memswpriority == 0) {
1284 swtotal = swusage;
1285 }
1286 }
1287
1288 snprintf(lbuf, 100, "SwapTotal: %8" PRIu64 " kB\n", swtotal);
1289 printme = lbuf;
1290 } else if (startswith(line, "SwapFree:")) {
1291 if (wants_swap) {
1292 swfree = swtotal - swusage;
1293 }
1294
1295 snprintf(lbuf, 100, "SwapFree: %8" PRIu64 " kB\n", swfree);
1296 printme = lbuf;
1297 } else if (startswith(line, "Slab:")) {
1298 snprintf(lbuf, 100, "Slab: %8" PRIu64 " kB\n", (uint64_t)0);
1299 printme = lbuf;
1300 } else if (startswith(line, "Buffers:")) {
1301 snprintf(lbuf, 100, "Buffers: %8" PRIu64 " kB\n", (uint64_t)0);
1302 printme = lbuf;
1303 } else if (startswith(line, "Cached:")) {
1304 snprintf(lbuf, 100, "Cached: %8" PRIu64 " kB\n",
1305 mstat.total_cache / 1024);
1306 printme = lbuf;
1307 } else if (startswith(line, "SwapCached:")) {
1308 snprintf(lbuf, 100, "SwapCached: %8" PRIu64 " kB\n", (uint64_t)0);
1309 printme = lbuf;
1310 } else if (startswith(line, "Active:")) {
1311 snprintf(lbuf, 100, "Active: %8" PRIu64 " kB\n",
1312 (mstat.total_active_anon +
1313 mstat.total_active_file) /
1314 1024);
1315 printme = lbuf;
1316 } else if (startswith(line, "Inactive:")) {
1317 snprintf(lbuf, 100, "Inactive: %8" PRIu64 " kB\n",
1318 (mstat.total_inactive_anon +
1319 mstat.total_inactive_file) /
1320 1024);
1321 printme = lbuf;
1322 } else if (startswith(line, "Active(anon):")) {
1323 snprintf(lbuf, 100, "Active(anon): %8" PRIu64 " kB\n",
1324 mstat.total_active_anon / 1024);
1325 printme = lbuf;
1326 } else if (startswith(line, "Inactive(anon):")) {
1327 snprintf(lbuf, 100, "Inactive(anon): %8" PRIu64 " kB\n",
1328 mstat.total_inactive_anon / 1024);
1329 printme = lbuf;
1330 } else if (startswith(line, "Active(file):")) {
1331 snprintf(lbuf, 100, "Active(file): %8" PRIu64 " kB\n",
1332 mstat.total_active_file / 1024);
1333 printme = lbuf;
1334 } else if (startswith(line, "Inactive(file):")) {
1335 snprintf(lbuf, 100, "Inactive(file): %8" PRIu64 " kB\n",
1336 mstat.total_inactive_file / 1024);
1337 printme = lbuf;
1338 } else if (startswith(line, "Unevictable:")) {
1339 snprintf(lbuf, 100, "Unevictable: %8" PRIu64 " kB\n",
1340 mstat.total_unevictable / 1024);
1341 printme = lbuf;
1342 } else if (startswith(line, "Dirty:")) {
1343 snprintf(lbuf, 100, "Dirty: %8" PRIu64 " kB\n",
1344 mstat.total_dirty / 1024);
1345 printme = lbuf;
1346 } else if (startswith(line, "Writeback:")) {
1347 snprintf(lbuf, 100, "Writeback: %8" PRIu64 " kB\n",
1348 mstat.total_writeback / 1024);
1349 printme = lbuf;
1350 } else if (startswith(line, "AnonPages:")) {
1351 snprintf(lbuf, 100, "AnonPages: %8" PRIu64 " kB\n",
1352 (mstat.total_active_anon +
1353 mstat.total_inactive_anon - mstat.total_shmem) /
1354 1024);
1355 printme = lbuf;
1356 } else if (startswith(line, "Mapped:")) {
1357 snprintf(lbuf, 100, "Mapped: %8" PRIu64 " kB\n",
1358 mstat.total_mapped_file / 1024);
1359 printme = lbuf;
1360 } else if (startswith(line, "SReclaimable:")) {
1361 snprintf(lbuf, 100, "SReclaimable: %8" PRIu64 " kB\n", (uint64_t)0);
1362 printme = lbuf;
1363 } else if (startswith(line, "SUnreclaim:")) {
1364 snprintf(lbuf, 100, "SUnreclaim: %8" PRIu64 " kB\n", (uint64_t)0);
1365 printme = lbuf;
1366 } else if (startswith(line, "Shmem:")) {
1367 snprintf(lbuf, 100, "Shmem: %8" PRIu64 " kB\n",
1368 mstat.total_shmem / 1024);
1369 printme = lbuf;
1370 } else if (startswith(line, "ShmemHugePages:")) {
1371 snprintf(lbuf, 100, "ShmemHugePages: %8" PRIu64 " kB\n", (uint64_t)0);
1372 printme = lbuf;
1373 } else if (startswith(line, "ShmemPmdMapped:")) {
1374 snprintf(lbuf, 100, "ShmemPmdMapped: %8" PRIu64 " kB\n", (uint64_t)0);
1375 printme = lbuf;
1376 } else if (startswith(line, "AnonHugePages:")) {
1377 snprintf(lbuf, 100, "AnonHugePages: %8" PRIu64 " kB\n",
1378 mstat.total_rss_huge / 1024);
1379 printme = lbuf;
1380 } else {
1381 printme = line;
1382 }
1383
1384 l = snprintf(cache, cache_size, "%s", printme);
1385 if (l < 0)
1386 return log_error(0, "Failed to write cache");
1387 if (l >= cache_size)
1388 return log_error(0, "Write to cache was truncated");
1389
1390 cache += l;
1391 cache_size -= l;
1392 total_len += l;
1393 }
1394
1395 d->cached = 1;
1396 d->size = total_len;
1397 if (total_len > size)
1398 total_len = size;
1399 memcpy(buf, d->buf, total_len);
1400
1401 return total_len;
1402 }
1403
1404 static int proc_slabinfo_read(char *buf, size_t size, off_t offset,
1405 struct fuse_file_info *fi)
1406 {
1407 __do_free char *cgroup = NULL, *line = NULL;
1408 __do_free void *fopen_cache = NULL;
1409 __do_fclose FILE *f = NULL;
1410 __do_close int fd = -EBADF;
1411 struct fuse_context *fc = fuse_get_context();
1412 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1413 size_t linelen = 0, total_len = 0;
1414 char *cache = d->buf;
1415 size_t cache_size = d->buflen;
1416 pid_t initpid;
1417
1418 if (offset) {
1419 int left;
1420
1421 if (offset > d->size)
1422 return -EINVAL;
1423
1424 if (!d->cached)
1425 return 0;
1426
1427 left = d->size - offset;
1428 total_len = left > size ? size : left;
1429 memcpy(buf, cache + offset, total_len);
1430
1431 return total_len;
1432 }
1433
1434 initpid = lookup_initpid_in_store(fc->pid);
1435 if (initpid <= 1 || is_shared_pidns(initpid))
1436 initpid = fc->pid;
1437
1438 cgroup = get_pid_cgroup(initpid, "memory");
1439 if (!cgroup)
1440 return read_file_fuse("/proc/slabinfo", buf, size, d);
1441
1442 prune_init_slice(cgroup);
1443
1444 fd = cgroup_ops->get_memory_slabinfo_fd(cgroup_ops, cgroup);
1445 if (fd < 0)
1446 return read_file_fuse("/proc/slabinfo", buf, size, d);
1447
1448 f = fdopen_cached(fd, "re", &fopen_cache);
1449 if (!f)
1450 return read_file_fuse("/proc/slabinfo", buf, size, d);
1451
1452 while (getline(&line, &linelen, f) != -1) {
1453 ssize_t l = snprintf(cache, cache_size, "%s", line);
1454 if (l < 0)
1455 return log_error(0, "Failed to write cache");
1456 if (l >= cache_size)
1457 return log_error(0, "Write to cache was truncated");
1458
1459 cache += l;
1460 cache_size -= l;
1461 total_len += l;
1462 }
1463
1464 d->cached = 1;
1465 d->size = total_len;
1466 if (total_len > size)
1467 total_len = size;
1468 memcpy(buf, d->buf, total_len);
1469
1470 return total_len;
1471 }
1472
1473 __lxcfs_fuse_ops int proc_read(const char *path, char *buf, size_t size,
1474 off_t offset, struct fuse_file_info *fi)
1475 {
1476 struct file_info *f = INTTYPE_TO_PTR(fi->fh);
1477
1478 switch (f->type) {
1479 case LXC_TYPE_PROC_MEMINFO:
1480 if (liblxcfs_functional())
1481 return proc_meminfo_read(buf, size, offset, fi);
1482
1483 return read_file_fuse_with_offset(LXC_TYPE_PROC_MEMINFO_PATH,
1484 buf, size, offset, f);
1485 case LXC_TYPE_PROC_CPUINFO:
1486 if (liblxcfs_functional())
1487 return proc_cpuinfo_read(buf, size, offset, fi);
1488
1489 return read_file_fuse_with_offset(LXC_TYPE_PROC_CPUINFO_PATH,
1490 buf, size, offset, f);
1491 case LXC_TYPE_PROC_UPTIME:
1492 if (liblxcfs_functional())
1493 return proc_uptime_read(buf, size, offset, fi);
1494
1495 return read_file_fuse_with_offset(LXC_TYPE_PROC_UPTIME_PATH,
1496 buf, size, offset, f);
1497 case LXC_TYPE_PROC_STAT:
1498 if (liblxcfs_functional())
1499 return proc_stat_read(buf, size, offset, fi);
1500
1501 return read_file_fuse_with_offset(LXC_TYPE_PROC_STAT_PATH, buf,
1502 size, offset, f);
1503 case LXC_TYPE_PROC_DISKSTATS:
1504 if (liblxcfs_functional())
1505 return proc_diskstats_read(buf, size, offset, fi);
1506
1507 return read_file_fuse_with_offset(LXC_TYPE_PROC_DISKSTATS_PATH,
1508 buf, size, offset, f);
1509 case LXC_TYPE_PROC_SWAPS:
1510 if (liblxcfs_functional())
1511 return proc_swaps_read(buf, size, offset, fi);
1512
1513 return read_file_fuse_with_offset(LXC_TYPE_PROC_SWAPS_PATH, buf,
1514 size, offset, f);
1515 case LXC_TYPE_PROC_LOADAVG:
1516 if (liblxcfs_functional())
1517 return proc_loadavg_read(buf, size, offset, fi);
1518
1519 return read_file_fuse_with_offset(LXC_TYPE_PROC_LOADAVG_PATH,
1520 buf, size, offset, f);
1521 case LXC_TYPE_PROC_SLABINFO:
1522 if (liblxcfs_functional())
1523 return proc_slabinfo_read(buf, size, offset, fi);
1524
1525 return read_file_fuse_with_offset(LXC_TYPE_PROC_SLABINFO_PATH,
1526 buf, size, offset, f);
1527 }
1528
1529 return -EINVAL;
1530 }