]>
Commit | Line | Data |
---|---|---|
db0463bf | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
1f5596dd CB |
2 | |
3 | #ifndef _GNU_SOURCE | |
4 | #define _GNU_SOURCE | |
5 | #endif | |
6 | ||
7 | #ifndef FUSE_USE_VERSION | |
8 | #define FUSE_USE_VERSION 26 | |
9 | #endif | |
10 | ||
11 | #define _FILE_OFFSET_BITS 64 | |
12 | ||
13 | #define __STDC_FORMAT_MACROS | |
14 | #include <dirent.h> | |
15 | #include <errno.h> | |
16 | #include <fcntl.h> | |
17 | #include <fuse.h> | |
18 | #include <inttypes.h> | |
19 | #include <libgen.h> | |
20 | #include <pthread.h> | |
21 | #include <sched.h> | |
22 | #include <stdarg.h> | |
23 | #include <stdbool.h> | |
24 | #include <stdint.h> | |
25 | #include <stdio.h> | |
26 | #include <stdlib.h> | |
27 | #include <string.h> | |
28 | #include <time.h> | |
29 | #include <unistd.h> | |
30 | #include <wait.h> | |
31 | #include <linux/magic.h> | |
32 | #include <linux/sched.h> | |
33 | #include <sys/epoll.h> | |
34 | #include <sys/mman.h> | |
35 | #include <sys/mount.h> | |
36 | #include <sys/param.h> | |
37 | #include <sys/socket.h> | |
38 | #include <sys/syscall.h> | |
39 | #include <sys/sysinfo.h> | |
40 | #include <sys/vfs.h> | |
41 | ||
42 | #include "bindings.h" | |
43 | #include "config.h" | |
44 | #include "cgroup_fuse.h" | |
45 | #include "cgroups/cgroup.h" | |
46 | #include "cgroups/cgroup_utils.h" | |
4ec5c9da | 47 | #include "cpuset_parse.h" |
1f5596dd CB |
48 | #include "memory_utils.h" |
49 | #include "proc_loadavg.h" | |
4ec5c9da | 50 | #include "proc_cpuview.h" |
1f5596dd CB |
51 | #include "utils.h" |
52 | ||
acff9786 CB |
53 | struct memory_stat { |
54 | uint64_t hierarchical_memory_limit; | |
55 | uint64_t hierarchical_memsw_limit; | |
56 | uint64_t total_cache; | |
57 | uint64_t total_rss; | |
58 | uint64_t total_rss_huge; | |
59 | uint64_t total_shmem; | |
60 | uint64_t total_mapped_file; | |
61 | uint64_t total_dirty; | |
62 | uint64_t total_writeback; | |
63 | uint64_t total_swap; | |
64 | uint64_t total_pgpgin; | |
65 | uint64_t total_pgpgout; | |
66 | uint64_t total_pgfault; | |
67 | uint64_t total_pgmajfault; | |
68 | uint64_t total_inactive_anon; | |
69 | uint64_t total_active_anon; | |
70 | uint64_t total_inactive_file; | |
71 | uint64_t total_active_file; | |
72 | uint64_t total_unevictable; | |
73 | }; | |
74 | ||
0d894438 | 75 | __lxcfs_fuse_ops int proc_getattr(const char *path, struct stat *sb) |
1f5596dd CB |
76 | { |
77 | struct timespec now; | |
78 | ||
79 | memset(sb, 0, sizeof(struct stat)); | |
80 | if (clock_gettime(CLOCK_REALTIME, &now) < 0) | |
81 | return -EINVAL; | |
f75d5b75 | 82 | |
1f5596dd CB |
83 | sb->st_uid = sb->st_gid = 0; |
84 | sb->st_atim = sb->st_mtim = sb->st_ctim = now; | |
85 | if (strcmp(path, "/proc") == 0) { | |
86 | sb->st_mode = S_IFDIR | 00555; | |
87 | sb->st_nlink = 2; | |
88 | return 0; | |
89 | } | |
f75d5b75 CB |
90 | |
91 | if (strcmp(path, "/proc/meminfo") == 0 || | |
92 | strcmp(path, "/proc/cpuinfo") == 0 || | |
93 | strcmp(path, "/proc/uptime") == 0 || | |
94 | strcmp(path, "/proc/stat") == 0 || | |
95 | strcmp(path, "/proc/diskstats") == 0 || | |
96 | strcmp(path, "/proc/swaps") == 0 || | |
97 | strcmp(path, "/proc/loadavg") == 0) { | |
1f5596dd CB |
98 | sb->st_size = 0; |
99 | sb->st_mode = S_IFREG | 00444; | |
100 | sb->st_nlink = 1; | |
101 | return 0; | |
102 | } | |
103 | ||
104 | return -ENOENT; | |
105 | } | |
106 | ||
0d894438 CB |
107 | __lxcfs_fuse_ops int proc_readdir(const char *path, void *buf, |
108 | fuse_fill_dir_t filler, off_t offset, | |
109 | struct fuse_file_info *fi) | |
1f5596dd CB |
110 | { |
111 | if (filler(buf, ".", NULL, 0) != 0 || | |
112 | filler(buf, "..", NULL, 0) != 0 || | |
113 | filler(buf, "cpuinfo", NULL, 0) != 0 || | |
114 | filler(buf, "meminfo", NULL, 0) != 0 || | |
115 | filler(buf, "stat", NULL, 0) != 0 || | |
116 | filler(buf, "uptime", NULL, 0) != 0 || | |
117 | filler(buf, "diskstats", NULL, 0) != 0 || | |
118 | filler(buf, "swaps", NULL, 0) != 0 || | |
119 | filler(buf, "loadavg", NULL, 0) != 0) | |
120 | return -EINVAL; | |
121 | ||
122 | return 0; | |
123 | } | |
124 | ||
f75d5b75 | 125 | static off_t get_procfile_size(const char *path) |
1f5596dd | 126 | { |
f75d5b75 CB |
127 | __do_fclose FILE *f = NULL; |
128 | __do_free char *line = NULL; | |
1f5596dd CB |
129 | size_t len = 0; |
130 | ssize_t sz, answer = 0; | |
f75d5b75 CB |
131 | |
132 | f = fopen(path, "re"); | |
1f5596dd CB |
133 | if (!f) |
134 | return 0; | |
135 | ||
136 | while ((sz = getline(&line, &len, f)) != -1) | |
137 | answer += sz; | |
1f5596dd CB |
138 | |
139 | return answer; | |
140 | } | |
141 | ||
0d894438 | 142 | __lxcfs_fuse_ops int proc_open(const char *path, struct fuse_file_info *fi) |
1f5596dd | 143 | { |
700dd417 | 144 | __do_free struct file_info *info = NULL; |
1f5596dd | 145 | int type = -1; |
1f5596dd CB |
146 | |
147 | if (strcmp(path, "/proc/meminfo") == 0) | |
148 | type = LXC_TYPE_PROC_MEMINFO; | |
149 | else if (strcmp(path, "/proc/cpuinfo") == 0) | |
150 | type = LXC_TYPE_PROC_CPUINFO; | |
151 | else if (strcmp(path, "/proc/uptime") == 0) | |
152 | type = LXC_TYPE_PROC_UPTIME; | |
153 | else if (strcmp(path, "/proc/stat") == 0) | |
154 | type = LXC_TYPE_PROC_STAT; | |
155 | else if (strcmp(path, "/proc/diskstats") == 0) | |
156 | type = LXC_TYPE_PROC_DISKSTATS; | |
157 | else if (strcmp(path, "/proc/swaps") == 0) | |
158 | type = LXC_TYPE_PROC_SWAPS; | |
159 | else if (strcmp(path, "/proc/loadavg") == 0) | |
160 | type = LXC_TYPE_PROC_LOADAVG; | |
161 | if (type == -1) | |
162 | return -ENOENT; | |
163 | ||
92cd8639 | 164 | info = zalloc(sizeof(*info)); |
1f5596dd CB |
165 | if (!info) |
166 | return -ENOMEM; | |
167 | ||
1f5596dd CB |
168 | info->type = type; |
169 | ||
170 | info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE; | |
171 | ||
92cd8639 | 172 | info->buf = zalloc(info->buflen); |
1f5596dd CB |
173 | if (!info->buf) |
174 | return -ENOMEM; | |
1f5596dd CB |
175 | /* set actual size to buffer size */ |
176 | info->size = info->buflen; | |
177 | ||
700dd417 | 178 | fi->fh = PTR_TO_UINT64(move_ptr(info)); |
1f5596dd CB |
179 | return 0; |
180 | } | |
181 | ||
0d894438 | 182 | __lxcfs_fuse_ops int proc_access(const char *path, int mask) |
1f5596dd CB |
183 | { |
184 | if (strcmp(path, "/proc") == 0 && access(path, R_OK) == 0) | |
185 | return 0; | |
186 | ||
187 | /* these are all read-only */ | |
188 | if ((mask & ~R_OK) != 0) | |
189 | return -EACCES; | |
f75d5b75 | 190 | |
1f5596dd CB |
191 | return 0; |
192 | } | |
193 | ||
0d894438 | 194 | __lxcfs_fuse_ops int proc_release(const char *path, struct fuse_file_info *fi) |
1f5596dd CB |
195 | { |
196 | do_release_file_info(fi); | |
197 | return 0; | |
198 | } | |
199 | ||
caac037d | 200 | static uint64_t get_memlimit(const char *cgroup, bool swap) |
1f5596dd | 201 | { |
1f5596dd | 202 | __do_free char *memlimit_str = NULL; |
2384f2a8 | 203 | uint64_t memlimit = 0; |
f75d5b75 | 204 | int ret; |
1f5596dd CB |
205 | |
206 | if (swap) | |
207 | ret = cgroup_ops->get_memory_swap_max(cgroup_ops, cgroup, &memlimit_str); | |
208 | else | |
209 | ret = cgroup_ops->get_memory_max(cgroup_ops, cgroup, &memlimit_str); | |
7dddc7f3 | 210 | if (ret > 0 && memlimit_str[0] && safe_uint64(memlimit_str, &memlimit, 10) < 0) |
2384f2a8 | 211 | lxcfs_error("Failed to convert memlimit %s", memlimit_str); |
1f5596dd CB |
212 | |
213 | return memlimit; | |
214 | } | |
215 | ||
caac037d | 216 | static uint64_t get_min_memlimit(const char *cgroup, bool swap) |
1f5596dd CB |
217 | { |
218 | __do_free char *copy = NULL; | |
2384f2a8 | 219 | uint64_t memlimit = 0, retlimit = 0; |
1f5596dd CB |
220 | |
221 | copy = strdup(cgroup); | |
f75d5b75 CB |
222 | if (!copy) |
223 | return log_error_errno(0, ENOMEM, "Failed to allocate memory"); | |
224 | ||
1f5596dd CB |
225 | retlimit = get_memlimit(copy, swap); |
226 | ||
227 | while (strcmp(copy, "/") != 0) { | |
228 | char *it = copy; | |
229 | ||
230 | it = dirname(it); | |
231 | memlimit = get_memlimit(it, swap); | |
2384f2a8 | 232 | if (memlimit > 0 && memlimit < retlimit) |
1f5596dd CB |
233 | retlimit = memlimit; |
234 | }; | |
235 | ||
236 | return retlimit; | |
237 | } | |
238 | ||
f75d5b75 | 239 | static inline bool startswith(const char *line, const char *pref) |
4ec5c9da | 240 | { |
f75d5b75 | 241 | return strncmp(line, pref, strlen(pref)) == 0; |
4ec5c9da CB |
242 | } |
243 | ||
1f5596dd CB |
244 | static int proc_swaps_read(char *buf, size_t size, off_t offset, |
245 | struct fuse_file_info *fi) | |
246 | { | |
9ec76fc9 | 247 | __do_free char *cgroup = NULL, *memusage_str = NULL, *memswusage_str = NULL; |
1f5596dd | 248 | struct fuse_context *fc = fuse_get_context(); |
5e3ca4d3 | 249 | struct lxcfs_opts *opts = (struct lxcfs_opts *)fuse_get_context()->private_data; |
a6ab8435 | 250 | bool wants_swap = opts && !opts->swap_off && liblxcfs_can_use_swap(); |
99b183fb | 251 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
caac037d | 252 | uint64_t memswlimit = 0, memlimit = 0, memusage = 0, memswusage = 0, |
37f75e57 | 253 | swtotal = 0, swfree = 0, swusage = 0; |
1f5596dd CB |
254 | ssize_t total_len = 0; |
255 | ssize_t l = 0; | |
256 | char *cache = d->buf; | |
257 | int ret; | |
258 | ||
259 | if (offset) { | |
260 | int left; | |
261 | ||
262 | if (offset > d->size) | |
263 | return -EINVAL; | |
264 | ||
265 | if (!d->cached) | |
266 | return 0; | |
267 | ||
268 | left = d->size - offset; | |
269 | total_len = left > size ? size: left; | |
270 | memcpy(buf, cache + offset, total_len); | |
271 | ||
272 | return total_len; | |
273 | } | |
274 | ||
275 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
276 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
277 | initpid = fc->pid; | |
a9f0d623 | 278 | |
37f75e57 CB |
279 | cgroup = get_pid_cgroup(initpid, "memory"); |
280 | if (!cgroup) | |
1f5596dd | 281 | return read_file_fuse("/proc/swaps", buf, size, d); |
37f75e57 | 282 | prune_init_slice(cgroup); |
1f5596dd | 283 | |
37f75e57 | 284 | memlimit = get_min_memlimit(cgroup, false); |
1f5596dd | 285 | |
37f75e57 | 286 | ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str); |
1f5596dd CB |
287 | if (ret < 0) |
288 | return 0; | |
289 | ||
2384f2a8 CB |
290 | if (safe_uint64(memusage_str, &memusage, 10) < 0) |
291 | lxcfs_error("Failed to convert memusage %s", memusage_str); | |
1f5596dd | 292 | |
5e3ca4d3 | 293 | if (wants_swap) { |
37f75e57 CB |
294 | memswlimit = get_min_memlimit(cgroup, true); |
295 | if (memswlimit > 0) { | |
296 | ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str); | |
297 | if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) { | |
298 | if (memlimit > memswlimit) | |
299 | swtotal = 0; | |
300 | else | |
301 | swtotal = (memswlimit - memlimit) / 1024; | |
302 | if (memusage > memswusage || swtotal == 0) | |
303 | swusage = 0; | |
304 | else | |
305 | swusage = (memswusage - memusage) / 1024; | |
306 | if (swtotal >= swusage) | |
307 | swfree = swtotal - swusage; | |
308 | } | |
5e3ca4d3 | 309 | } |
1f5596dd CB |
310 | } |
311 | ||
312 | total_len = snprintf(d->buf, d->size, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); | |
313 | ||
314 | /* When no mem + swap limit is specified or swapaccount=0*/ | |
315 | if (!memswlimit) { | |
316 | __do_free char *line = NULL; | |
757a63e7 | 317 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
318 | __do_fclose FILE *f = NULL; |
319 | size_t linelen = 0; | |
320 | ||
757a63e7 | 321 | f = fopen_cached("/proc/meminfo", "re", &fopen_cache); |
1f5596dd CB |
322 | if (!f) |
323 | return 0; | |
324 | ||
325 | while (getline(&line, &linelen, f) != -1) { | |
326 | if (startswith(line, "SwapTotal:")) | |
37f75e57 | 327 | sscanf(line, "SwapTotal: %8" PRIu64 " kB", &swtotal); |
1f5596dd | 328 | else if (startswith(line, "SwapFree:")) |
37f75e57 | 329 | sscanf(line, "SwapFree: %8" PRIu64 " kB", &swfree); |
1f5596dd CB |
330 | } |
331 | } | |
332 | ||
37f75e57 | 333 | if (swtotal > 0) { |
1f5596dd | 334 | l = snprintf(d->buf + total_len, d->size - total_len, |
caac037d | 335 | "none%*svirtual\t\t%" PRIu64 "\t%" PRIu64 "\t0\n", |
37f75e57 | 336 | 36, " ", swtotal, swfree); |
1f5596dd CB |
337 | total_len += l; |
338 | } | |
339 | ||
f75d5b75 CB |
340 | if (total_len < 0 || l < 0) |
341 | return log_error(0, "Failed writing to cache"); | |
1f5596dd CB |
342 | |
343 | d->cached = 1; | |
344 | d->size = (int)total_len; | |
345 | ||
f75d5b75 CB |
346 | if (total_len > size) |
347 | total_len = size; | |
1f5596dd | 348 | memcpy(buf, d->buf, total_len); |
f75d5b75 | 349 | |
1f5596dd CB |
350 | return total_len; |
351 | } | |
352 | ||
353 | static void get_blkio_io_value(char *str, unsigned major, unsigned minor, | |
0553c566 | 354 | char *iotype, uint64_t *v) |
1f5596dd CB |
355 | { |
356 | char *eol; | |
357 | char key[32]; | |
f75d5b75 | 358 | size_t len; |
1f5596dd CB |
359 | |
360 | memset(key, 0, 32); | |
361 | snprintf(key, 32, "%u:%u %s", major, minor, iotype); | |
362 | ||
1f5596dd | 363 | *v = 0; |
f75d5b75 | 364 | len = strlen(key); |
1f5596dd CB |
365 | while (*str) { |
366 | if (startswith(str, key)) { | |
367 | sscanf(str + len, "%lu", v); | |
368 | return; | |
369 | } | |
370 | eol = strchr(str, '\n'); | |
371 | if (!eol) | |
372 | return; | |
373 | str = eol+1; | |
374 | } | |
375 | } | |
376 | ||
377 | static int proc_diskstats_read(char *buf, size_t size, off_t offset, | |
378 | struct fuse_file_info *fi) | |
379 | { | |
380 | __do_free char *cg = NULL, *io_serviced_str = NULL, | |
381 | *io_merged_str = NULL, *io_service_bytes_str = NULL, | |
382 | *io_wait_time_str = NULL, *io_service_time_str = NULL, | |
383 | *line = NULL; | |
757a63e7 | 384 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
385 | __do_fclose FILE *f = NULL; |
386 | struct fuse_context *fc = fuse_get_context(); | |
99b183fb | 387 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
0553c566 CB |
388 | uint64_t read = 0, write = 0; |
389 | uint64_t read_merged = 0, write_merged = 0; | |
390 | uint64_t read_sectors = 0, write_sectors = 0; | |
391 | uint64_t read_ticks = 0, write_ticks = 0; | |
392 | uint64_t ios_pgr = 0, tot_ticks = 0, rq_ticks = 0; | |
393 | uint64_t rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0; | |
1f5596dd CB |
394 | char *cache = d->buf; |
395 | size_t cache_size = d->buflen; | |
396 | size_t linelen = 0, total_len = 0; | |
397 | unsigned int major = 0, minor = 0; | |
398 | int i = 0; | |
399 | int ret; | |
400 | char dev_name[72]; | |
401 | ||
287555b8 | 402 | if (offset) { |
1f5596dd CB |
403 | int left; |
404 | ||
405 | if (offset > d->size) | |
406 | return -EINVAL; | |
407 | ||
408 | if (!d->cached) | |
409 | return 0; | |
410 | ||
411 | left = d->size - offset; | |
412 | total_len = left > size ? size: left; | |
413 | memcpy(buf, cache + offset, total_len); | |
414 | ||
415 | return total_len; | |
416 | } | |
417 | ||
418 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
419 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
420 | initpid = fc->pid; | |
a9f0d623 | 421 | |
1f5596dd CB |
422 | cg = get_pid_cgroup(initpid, "blkio"); |
423 | if (!cg) | |
424 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
425 | prune_init_slice(cg); | |
426 | ||
427 | ret = cgroup_ops->get_io_serviced(cgroup_ops, cg, &io_serviced_str); | |
428 | if (ret < 0) { | |
429 | if (ret == -EOPNOTSUPP) | |
430 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
431 | } | |
432 | ||
433 | ret = cgroup_ops->get_io_merged(cgroup_ops, cg, &io_merged_str); | |
434 | if (ret < 0) { | |
435 | if (ret == -EOPNOTSUPP) | |
436 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
437 | } | |
438 | ||
439 | ret = cgroup_ops->get_io_service_bytes(cgroup_ops, cg, &io_service_bytes_str); | |
440 | if (ret < 0) { | |
441 | if (ret == -EOPNOTSUPP) | |
442 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
443 | } | |
444 | ||
445 | ret = cgroup_ops->get_io_wait_time(cgroup_ops, cg, &io_wait_time_str); | |
446 | if (ret < 0) { | |
447 | if (ret == -EOPNOTSUPP) | |
448 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
449 | } | |
450 | ||
451 | ret = cgroup_ops->get_io_service_time(cgroup_ops, cg, &io_service_time_str); | |
452 | if (ret < 0) { | |
453 | if (ret == -EOPNOTSUPP) | |
454 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
455 | } | |
456 | ||
757a63e7 | 457 | f = fopen_cached("/proc/diskstats", "re", &fopen_cache); |
1f5596dd CB |
458 | if (!f) |
459 | return 0; | |
460 | ||
461 | while (getline(&line, &linelen, f) != -1) { | |
462 | ssize_t l; | |
463 | char lbuf[256]; | |
464 | ||
465 | i = sscanf(line, "%u %u %71s", &major, &minor, dev_name); | |
466 | if (i != 3) | |
467 | continue; | |
468 | ||
469 | get_blkio_io_value(io_serviced_str, major, minor, "Read", &read); | |
470 | get_blkio_io_value(io_serviced_str, major, minor, "Write", &write); | |
471 | get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged); | |
472 | get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged); | |
473 | get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors); | |
474 | read_sectors = read_sectors/512; | |
475 | get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors); | |
476 | write_sectors = write_sectors/512; | |
477 | ||
478 | get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm); | |
479 | rd_svctm = rd_svctm/1000000; | |
480 | get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait); | |
481 | rd_wait = rd_wait/1000000; | |
482 | read_ticks = rd_svctm + rd_wait; | |
483 | ||
484 | get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm); | |
485 | wr_svctm = wr_svctm/1000000; | |
486 | get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait); | |
487 | wr_wait = wr_wait/1000000; | |
488 | write_ticks = wr_svctm + wr_wait; | |
489 | ||
490 | get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks); | |
491 | tot_ticks = tot_ticks/1000000; | |
492 | ||
493 | memset(lbuf, 0, 256); | |
494 | if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) | |
495 | snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | |
496 | major, minor, dev_name, read, read_merged, read_sectors, read_ticks, | |
497 | write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks); | |
498 | else | |
499 | continue; | |
500 | ||
501 | l = snprintf(cache, cache_size, "%s", lbuf); | |
f75d5b75 CB |
502 | if (l < 0) |
503 | return log_error(0, "Failed to write cache"); | |
504 | if (l >= cache_size) | |
505 | return log_error(0, "Write to cache was truncated"); | |
506 | ||
1f5596dd CB |
507 | cache += l; |
508 | cache_size -= l; | |
509 | total_len += l; | |
510 | } | |
511 | ||
512 | d->cached = 1; | |
513 | d->size = total_len; | |
287555b8 CB |
514 | if (total_len > size) |
515 | total_len = size; | |
1f5596dd CB |
516 | memcpy(buf, d->buf, total_len); |
517 | ||
518 | return total_len; | |
519 | } | |
520 | ||
521 | #if RELOADTEST | |
12a60884 | 522 | static inline void iwashere(void) |
1f5596dd | 523 | { |
12a60884 | 524 | mknod("/tmp/lxcfs-iwashere", S_IFREG, 0644); |
1f5596dd CB |
525 | } |
526 | #endif | |
527 | ||
4bf37888 CB |
528 | /* |
529 | * This function retrieves the busy time of a group of tasks by looking at | |
1f5596dd CB |
530 | * cpuacct.usage. Unfortunately, this only makes sense when the container has |
531 | * been given it's own cpuacct cgroup. If not, this function will take the busy | |
532 | * time of all other taks that do not actually belong to the container into | |
533 | * account as well. If someone has a clever solution for this please send a | |
534 | * patch! | |
535 | */ | |
536 | static double get_reaper_busy(pid_t task) | |
537 | { | |
538 | __do_free char *cgroup = NULL, *usage_str = NULL; | |
caac037d | 539 | uint64_t usage = 0; |
1f5596dd CB |
540 | pid_t initpid; |
541 | ||
542 | initpid = lookup_initpid_in_store(task); | |
543 | if (initpid <= 0) | |
544 | return 0; | |
545 | ||
546 | cgroup = get_pid_cgroup(initpid, "cpuacct"); | |
547 | if (!cgroup) | |
548 | return 0; | |
549 | prune_init_slice(cgroup); | |
4bf37888 | 550 | |
caac037d | 551 | if (!cgroup_ops->get(cgroup_ops, "cpuacct", cgroup, "cpuacct.usage", &usage_str)) |
1f5596dd CB |
552 | return 0; |
553 | ||
2384f2a8 CB |
554 | if (safe_uint64(usage_str, &usage, 10) < 0) |
555 | lxcfs_error("Failed to convert usage %s", usage_str); | |
556 | ||
1f5596dd CB |
557 | return ((double)usage / 1000000000); |
558 | } | |
559 | ||
560 | static uint64_t get_reaper_start_time(pid_t pid) | |
561 | { | |
757a63e7 | 562 | __do_free void *fopen_cache = NULL; |
12a60884 | 563 | __do_fclose FILE *f = NULL; |
1f5596dd | 564 | int ret; |
1f5596dd | 565 | uint64_t starttime; |
4bf37888 CB |
566 | char path[STRLITERALLEN("/proc/") + LXCFS_NUMSTRLEN64 + |
567 | STRLITERALLEN("/stat") + 1]; | |
1f5596dd CB |
568 | pid_t qpid; |
569 | ||
570 | qpid = lookup_initpid_in_store(pid); | |
4bf37888 CB |
571 | if (qpid <= 0) |
572 | return ret_errno(EINVAL); | |
1f5596dd | 573 | |
4bf37888 CB |
574 | ret = snprintf(path, sizeof(path), "/proc/%d/stat", qpid); |
575 | if (ret < 0 || (size_t)ret >= sizeof(path)) | |
576 | return ret_errno(EINVAL); | |
1f5596dd | 577 | |
757a63e7 | 578 | f = fopen_cached(path, "re", &fopen_cache); |
4bf37888 CB |
579 | if (!f) |
580 | return ret_errno(EINVAL); | |
1f5596dd CB |
581 | |
582 | /* Note that the *scanf() argument supression requires that length | |
583 | * modifiers such as "l" are omitted. Otherwise some compilers will yell | |
584 | * at us. It's like telling someone you're not married and then asking | |
585 | * if you can bring your wife to the party. | |
586 | */ | |
587 | ret = fscanf(f, "%*d " /* (1) pid %d */ | |
588 | "%*s " /* (2) comm %s */ | |
589 | "%*c " /* (3) state %c */ | |
590 | "%*d " /* (4) ppid %d */ | |
591 | "%*d " /* (5) pgrp %d */ | |
592 | "%*d " /* (6) session %d */ | |
593 | "%*d " /* (7) tty_nr %d */ | |
594 | "%*d " /* (8) tpgid %d */ | |
595 | "%*u " /* (9) flags %u */ | |
596 | "%*u " /* (10) minflt %lu */ | |
597 | "%*u " /* (11) cminflt %lu */ | |
598 | "%*u " /* (12) majflt %lu */ | |
599 | "%*u " /* (13) cmajflt %lu */ | |
600 | "%*u " /* (14) utime %lu */ | |
601 | "%*u " /* (15) stime %lu */ | |
602 | "%*d " /* (16) cutime %ld */ | |
603 | "%*d " /* (17) cstime %ld */ | |
604 | "%*d " /* (18) priority %ld */ | |
605 | "%*d " /* (19) nice %ld */ | |
606 | "%*d " /* (20) num_threads %ld */ | |
607 | "%*d " /* (21) itrealvalue %ld */ | |
608 | "%" PRIu64, /* (22) starttime %llu */ | |
609 | &starttime); | |
12a60884 | 610 | if (ret != 1) |
4bf37888 | 611 | return ret_errno(EINVAL); |
1f5596dd | 612 | |
12a60884 | 613 | return ret_set_errno(starttime, 0); |
1f5596dd CB |
614 | } |
615 | ||
616 | static double get_reaper_start_time_in_sec(pid_t pid) | |
617 | { | |
618 | uint64_t clockticks, ticks_per_sec; | |
619 | int64_t ret; | |
620 | double res = 0; | |
621 | ||
622 | clockticks = get_reaper_start_time(pid); | |
4bf37888 | 623 | if (clockticks <= 0) |
f75d5b75 | 624 | return log_debug(0, "Failed to retrieve start time of pid %d", pid); |
1f5596dd CB |
625 | |
626 | ret = sysconf(_SC_CLK_TCK); | |
4bf37888 | 627 | if (ret < 0) |
f75d5b75 | 628 | return log_debug(0, "Failed to determine number of clock ticks in a second"); |
1f5596dd CB |
629 | |
630 | ticks_per_sec = (uint64_t)ret; | |
631 | res = (double)clockticks / ticks_per_sec; | |
632 | return res; | |
633 | } | |
634 | ||
635 | static double get_reaper_age(pid_t pid) | |
636 | { | |
637 | uint64_t uptime_ms; | |
638 | double procstart, procage; | |
639 | ||
4bf37888 CB |
640 | /* |
641 | * We need to substract the time the process has started since system | |
1f5596dd CB |
642 | * boot minus the time when the system has started to get the actual |
643 | * reaper age. | |
644 | */ | |
645 | procstart = get_reaper_start_time_in_sec(pid); | |
646 | procage = procstart; | |
647 | if (procstart > 0) { | |
648 | int ret; | |
649 | struct timespec spec; | |
650 | ||
651 | ret = clock_gettime(CLOCK_BOOTTIME, &spec); | |
652 | if (ret < 0) | |
653 | return 0; | |
654 | ||
1f5596dd CB |
655 | uptime_ms = (spec.tv_sec * 1000) + (spec.tv_nsec * 1e-6); |
656 | procage = (uptime_ms - (procstart * 1000)) / 1000; | |
657 | } | |
658 | ||
659 | return procage; | |
660 | } | |
661 | ||
662 | /* | |
663 | * We read /proc/uptime and reuse its second field. | |
664 | * For the first field, we use the mtime for the reaper for | |
665 | * the calling pid as returned by getreaperage | |
666 | */ | |
667 | static int proc_uptime_read(char *buf, size_t size, off_t offset, | |
668 | struct fuse_file_info *fi) | |
669 | { | |
670 | struct fuse_context *fc = fuse_get_context(); | |
99b183fb | 671 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
1f5596dd | 672 | char *cache = d->buf; |
03a51c63 | 673 | ssize_t total_len = 0, ret = 0; |
a7ec61a4 | 674 | double busytime, idletime, reaperage; |
1f5596dd CB |
675 | |
676 | #if RELOADTEST | |
677 | iwashere(); | |
678 | #endif | |
679 | ||
f75d5b75 CB |
680 | if (offset) { |
681 | int left; | |
682 | ||
1f5596dd CB |
683 | if (offset > d->size) |
684 | return -EINVAL; | |
f75d5b75 | 685 | |
03a51c63 CB |
686 | if (!d->cached) |
687 | return 0; | |
688 | ||
f75d5b75 CB |
689 | left = d->size - offset; |
690 | total_len = left > size ? size : left; | |
1f5596dd | 691 | memcpy(buf, cache + offset, total_len); |
f75d5b75 | 692 | |
1f5596dd CB |
693 | return total_len; |
694 | } | |
695 | ||
696 | reaperage = get_reaper_age(fc->pid); | |
f75d5b75 CB |
697 | /* |
698 | * To understand why this is done, please read the comment to the | |
1f5596dd CB |
699 | * get_reaper_busy() function. |
700 | */ | |
701 | idletime = reaperage; | |
a7ec61a4 | 702 | busytime = get_reaper_busy(fc->pid); |
1f5596dd CB |
703 | if (reaperage >= busytime) |
704 | idletime = reaperage - busytime; | |
705 | ||
03a51c63 CB |
706 | ret = snprintf(d->buf, d->buflen, "%.2lf %.2lf\n", reaperage, idletime); |
707 | if (ret < 0 || ret >= d->buflen) | |
4bf37888 | 708 | return read_file_fuse("/proc/uptime", buf, size, d); |
03a51c63 | 709 | total_len = ret; |
1f5596dd | 710 | |
1f5596dd | 711 | d->cached = 1; |
03a51c63 | 712 | d->size = total_len; |
f75d5b75 CB |
713 | if (total_len > size) |
714 | total_len = size; | |
1f5596dd | 715 | memcpy(buf, d->buf, total_len); |
4bf37888 | 716 | |
1f5596dd CB |
717 | return total_len; |
718 | } | |
719 | ||
720 | #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2) | |
721 | static int proc_stat_read(char *buf, size_t size, off_t offset, | |
722 | struct fuse_file_info *fi) | |
723 | { | |
724 | __do_free char *cg = NULL, *cpuset = NULL, *line = NULL; | |
757a63e7 | 725 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
726 | __do_free struct cpuacct_usage *cg_cpu_usage = NULL; |
727 | __do_fclose FILE *f = NULL; | |
728 | struct fuse_context *fc = fuse_get_context(); | |
8044f626 | 729 | struct lxcfs_opts *opts = (struct lxcfs_opts *)fc->private_data; |
99b183fb | 730 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
731 | size_t linelen = 0, total_len = 0; |
732 | int curcpu = -1; /* cpu numbering starts at 0 */ | |
733 | int physcpu = 0; | |
0553c566 CB |
734 | uint64_t user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, |
735 | softirq = 0, steal = 0, guest = 0, guest_nice = 0; | |
736 | uint64_t user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, | |
737 | iowait_sum = 0, irq_sum = 0, softirq_sum = 0, steal_sum = 0, | |
738 | guest_sum = 0, guest_nice_sum = 0; | |
1f5596dd CB |
739 | char cpuall[CPUALL_MAX_SIZE]; |
740 | /* reserve for cpu all */ | |
741 | char *cache = d->buf + CPUALL_MAX_SIZE; | |
742 | size_t cache_size = d->buflen - CPUALL_MAX_SIZE; | |
743 | int cg_cpu_usage_size = 0; | |
744 | ||
7b367480 CB |
745 | if (offset) { |
746 | int left; | |
747 | ||
1f5596dd CB |
748 | if (offset > d->size) |
749 | return -EINVAL; | |
7b367480 | 750 | |
1f5596dd CB |
751 | if (!d->cached) |
752 | return 0; | |
7b367480 CB |
753 | |
754 | left = d->size - offset; | |
755 | total_len = left > size ? size : left; | |
1f5596dd | 756 | memcpy(buf, d->buf + offset, total_len); |
7b367480 | 757 | |
1f5596dd CB |
758 | return total_len; |
759 | } | |
760 | ||
761 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
a9f0d623 | 762 | if (initpid <= 1 || is_shared_pidns(initpid)) |
1f5596dd CB |
763 | initpid = fc->pid; |
764 | ||
765 | /* | |
766 | * when container run with host pid namespace initpid == 1, cgroup will "/" | |
767 | * we should return host os's /proc contents. | |
768 | * in some case cpuacct_usage.all in "/" will larger then /proc/stat | |
769 | */ | |
605e157d | 770 | if (initpid == 1) |
6300e6c6 | 771 | return read_file_fuse("/proc/stat", buf, size, d); |
1f5596dd CB |
772 | |
773 | cg = get_pid_cgroup(initpid, "cpuset"); | |
1f5596dd CB |
774 | if (!cg) |
775 | return read_file_fuse("/proc/stat", buf, size, d); | |
776 | prune_init_slice(cg); | |
777 | ||
778 | cpuset = get_cpuset(cg); | |
779 | if (!cpuset) | |
780 | return 0; | |
781 | ||
77711d7a CB |
782 | f = fopen_cached("/proc/stat", "re", &fopen_cache); |
783 | if (!f) | |
784 | return 0; | |
785 | ||
19f42b8b CB |
786 | /* Skip first system cpu line. */ |
787 | if (getline(&line, &linelen, f) < 0) | |
788 | return log_error(0, "proc_stat_read read first line failed"); | |
789 | ||
1f5596dd CB |
790 | /* |
791 | * Read cpuacct.usage_all for all CPUs. | |
792 | * If the cpuacct cgroup is present, it is used to calculate the container's | |
793 | * CPU usage. If not, values from the host's /proc/stat are used. | |
794 | */ | |
f9434b9a CB |
795 | if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage, &cg_cpu_usage_size) == 0) { |
796 | if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs) { | |
797 | total_len = cpuview_proc_stat(cg, cpuset, cg_cpu_usage, | |
798 | cg_cpu_usage_size, f, | |
799 | d->buf, d->buflen); | |
800 | goto out; | |
801 | } | |
802 | } else { | |
803 | lxcfs_v("proc_stat_read failed to read from cpuacct, falling back to the host's /proc/stat"); | |
804 | } | |
1f5596dd | 805 | |
1f5596dd CB |
806 | while (getline(&line, &linelen, f) != -1) { |
807 | ssize_t l; | |
808 | char cpu_char[10]; /* That's a lot of cores */ | |
809 | char *c; | |
810 | uint64_t all_used, cg_used, new_idle; | |
811 | int ret; | |
812 | ||
813 | if (strlen(line) == 0) | |
814 | continue; | |
815 | if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) { | |
816 | /* not a ^cpuN line containing a number N, just print it */ | |
817 | l = snprintf(cache, cache_size, "%s", line); | |
f75d5b75 CB |
818 | if (l < 0) |
819 | return log_error(0, "Failed to write cache"); | |
820 | if (l >= cache_size) | |
821 | return log_error(0, "Write to cache was truncated"); | |
822 | ||
1f5596dd CB |
823 | cache += l; |
824 | cache_size -= l; | |
825 | total_len += l; | |
f75d5b75 | 826 | |
1f5596dd CB |
827 | continue; |
828 | } | |
829 | ||
830 | if (sscanf(cpu_char, "%d", &physcpu) != 1) | |
831 | continue; | |
f75d5b75 | 832 | |
1f5596dd CB |
833 | if (!cpu_in_cpuset(physcpu, cpuset)) |
834 | continue; | |
f75d5b75 | 835 | |
2b8eff1d | 836 | curcpu++; |
1f5596dd CB |
837 | |
838 | ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", | |
839 | &user, | |
840 | &nice, | |
841 | &system, | |
842 | &idle, | |
843 | &iowait, | |
844 | &irq, | |
845 | &softirq, | |
846 | &steal, | |
847 | &guest, | |
848 | &guest_nice); | |
1f5596dd CB |
849 | if (ret != 10 || !cg_cpu_usage) { |
850 | c = strchr(line, ' '); | |
851 | if (!c) | |
852 | continue; | |
1f5596dd | 853 | |
f75d5b75 CB |
854 | l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c); |
855 | if (l < 0) | |
856 | return log_error(0, "Failed to write cache"); | |
857 | if (l >= cache_size) | |
858 | return log_error(0, "Write to cache was truncated"); | |
1f5596dd CB |
859 | |
860 | cache += l; | |
861 | cache_size -= l; | |
862 | total_len += l; | |
863 | ||
864 | if (ret != 10) | |
865 | continue; | |
866 | } | |
867 | ||
868 | if (cg_cpu_usage) { | |
869 | if (physcpu >= cg_cpu_usage_size) | |
870 | break; | |
871 | ||
872 | all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice; | |
873 | cg_used = cg_cpu_usage[physcpu].user + cg_cpu_usage[physcpu].system; | |
874 | ||
875 | if (all_used >= cg_used) { | |
876 | new_idle = idle + (all_used - cg_used); | |
877 | ||
878 | } else { | |
2b8eff1d CB |
879 | lxcfs_error("cpu%d from %s has unexpected cpu time: %" PRIu64 " in /proc/stat, %" PRIu64 " in cpuacct.usage_all; unable to determine idle time", |
880 | curcpu, cg, all_used, cg_used); | |
1f5596dd CB |
881 | new_idle = idle; |
882 | } | |
883 | ||
2b8eff1d CB |
884 | l = snprintf(cache, cache_size, |
885 | "cpu%d %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n", | |
886 | curcpu, cg_cpu_usage[physcpu].user, | |
887 | cg_cpu_usage[physcpu].system, new_idle); | |
f75d5b75 CB |
888 | if (l < 0) |
889 | return log_error(0, "Failed to write cache"); | |
890 | if (l >= cache_size) | |
891 | return log_error(0, "Write to cache was truncated"); | |
1f5596dd CB |
892 | |
893 | cache += l; | |
894 | cache_size -= l; | |
895 | total_len += l; | |
896 | ||
897 | user_sum += cg_cpu_usage[physcpu].user; | |
898 | system_sum += cg_cpu_usage[physcpu].system; | |
899 | idle_sum += new_idle; | |
1f5596dd CB |
900 | } else { |
901 | user_sum += user; | |
902 | nice_sum += nice; | |
903 | system_sum += system; | |
904 | idle_sum += idle; | |
905 | iowait_sum += iowait; | |
906 | irq_sum += irq; | |
907 | softirq_sum += softirq; | |
908 | steal_sum += steal; | |
909 | guest_sum += guest; | |
910 | guest_nice_sum += guest_nice; | |
911 | } | |
912 | } | |
913 | ||
914 | cache = d->buf; | |
915 | ||
916 | int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | |
917 | user_sum, | |
918 | nice_sum, | |
919 | system_sum, | |
920 | idle_sum, | |
921 | iowait_sum, | |
922 | irq_sum, | |
923 | softirq_sum, | |
924 | steal_sum, | |
925 | guest_sum, | |
926 | guest_nice_sum); | |
927 | if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE) { | |
928 | memcpy(cache, cpuall, cpuall_len); | |
929 | cache += cpuall_len; | |
930 | } else { | |
931 | /* shouldn't happen */ | |
f75d5b75 | 932 | lxcfs_error("proc_stat_read copy cpuall failed, cpuall_len=%d", cpuall_len); |
1f5596dd CB |
933 | cpuall_len = 0; |
934 | } | |
935 | ||
936 | memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len); | |
937 | total_len += cpuall_len; | |
938 | ||
939 | out: | |
940 | d->cached = 1; | |
941 | d->size = total_len; | |
942 | if (total_len > size) | |
943 | total_len = size; | |
944 | ||
945 | memcpy(buf, d->buf, total_len); | |
946 | return total_len; | |
947 | } | |
948 | ||
949 | /* Note that "memory.stat" in cgroup2 is hierarchical by default. */ | |
acff9786 | 950 | static bool cgroup_parse_memory_stat(const char *cgroup, struct memory_stat *mstat) |
1f5596dd | 951 | { |
39fc7fdc | 952 | __do_close int fd = -EBADF; |
acff9786 CB |
953 | __do_fclose FILE *f = NULL; |
954 | __do_free char *line = NULL; | |
28519477 | 955 | __do_free void *fdopen_cache = NULL; |
acff9786 CB |
956 | bool unified; |
957 | size_t len = 0; | |
958 | ssize_t linelen; | |
1f5596dd | 959 | |
acff9786 CB |
960 | fd = cgroup_ops->get_memory_stats_fd(cgroup_ops, cgroup); |
961 | if (fd < 0) | |
962 | return false; | |
963 | ||
28519477 | 964 | f = fdopen_cached(fd, "re", &fdopen_cache); |
acff9786 CB |
965 | if (!f) |
966 | return false; | |
acff9786 CB |
967 | |
968 | unified = pure_unified_layout(cgroup_ops); | |
969 | while ((linelen = getline(&line, &len, f)) != -1) { | |
970 | if (!unified && startswith(line, "hierarchical_memory_limit")) { | |
971 | sscanf(line, "hierarchical_memory_limit %" PRIu64, &(mstat->hierarchical_memory_limit)); | |
972 | } else if (!unified && startswith(line, "hierarchical_memsw_limit")) { | |
973 | sscanf(line, "hierarchical_memsw_limit %" PRIu64, &(mstat->hierarchical_memsw_limit)); | |
91d63a9e IM |
974 | } else if (startswith(line, unified ? "file" :"total_cache")) { |
975 | sscanf(line, unified ? "file %" PRIu64 : "total_cache %" PRIu64, &(mstat->total_cache)); | |
acff9786 CB |
976 | } else if (!unified && startswith(line, "total_rss")) { |
977 | sscanf(line, "total_rss %" PRIu64, &(mstat->total_rss)); | |
978 | } else if (!unified && startswith(line, "total_rss_huge")) { | |
979 | sscanf(line, "total_rss_huge %" PRIu64, &(mstat->total_rss_huge)); | |
980 | } else if (startswith(line, unified ? "shmem" : "total_shmem")) { | |
981 | sscanf(line, unified ? "shmem %" PRIu64 : "total_shmem %" PRIu64, &(mstat->total_shmem)); | |
982 | } else if (startswith(line, unified ? "file_mapped" : "total_mapped_file")) { | |
983 | sscanf(line, unified ? "file_mapped %" PRIu64 : "total_mapped_file %" PRIu64, &(mstat->total_mapped_file)); | |
984 | } else if (!unified && startswith(line, "total_dirty")) { | |
985 | sscanf(line, "total_dirty %" PRIu64, &(mstat->total_dirty)); | |
986 | } else if (!unified && startswith(line, "total_writeback")) { | |
987 | sscanf(line, "total_writeback %" PRIu64, &(mstat->total_writeback)); | |
988 | } else if (!unified && startswith(line, "total_swap")) { | |
989 | sscanf(line, "total_swap %" PRIu64, &(mstat->total_swap)); | |
990 | } else if (!unified && startswith(line, "total_pgpgin")) { | |
991 | sscanf(line, "total_pgpgin %" PRIu64, &(mstat->total_pgpgin)); | |
992 | } else if (!unified && startswith(line, "total_pgpgout")) { | |
993 | sscanf(line, "total_pgpgout %" PRIu64, &(mstat->total_pgpgout)); | |
994 | } else if (startswith(line, unified ? "pgfault" : "total_pgfault")) { | |
995 | sscanf(line, unified ? "pgfault %" PRIu64 : "total_pgfault %" PRIu64, &(mstat->total_pgfault)); | |
996 | } else if (startswith(line, unified ? "pgmajfault" : "total_pgmajfault")) { | |
997 | sscanf(line, unified ? "pgmajfault %" PRIu64 : "total_pgmajfault %" PRIu64, &(mstat->total_pgmajfault)); | |
998 | } else if (startswith(line, unified ? "inactive_anon" : "total_inactive_anon")) { | |
999 | sscanf(line, unified ? "inactive_anon %" PRIu64 : "total_inactive_anon %" PRIu64, &(mstat->total_inactive_anon)); | |
1000 | } else if (startswith(line, unified ? "active_anon" : "total_active_anon")) { | |
1001 | sscanf(line, unified ? "active_anon %" PRIu64 : "total_active_anon %" PRIu64, &(mstat->total_active_anon)); | |
1002 | } else if (startswith(line, unified ? "inactive_file" : "total_inactive_file")) { | |
1003 | sscanf(line, unified ? "inactive_file %" PRIu64 : "total_inactive_file %" PRIu64, &(mstat->total_inactive_file)); | |
1004 | } else if (startswith(line, unified ? "active_file" : "total_active_file")) { | |
1005 | sscanf(line, unified ? "active_file %" PRIu64 : "total_active_file %" PRIu64, &(mstat->total_active_file)); | |
1006 | } else if (startswith(line, unified ? "unevictable" : "total_unevictable")) { | |
1007 | sscanf(line, unified ? "unevictable %" PRIu64 : "total_unevictable %" PRIu64, &(mstat->total_unevictable)); | |
1f5596dd | 1008 | } |
1f5596dd | 1009 | } |
acff9786 CB |
1010 | |
1011 | return true; | |
1f5596dd CB |
1012 | } |
1013 | ||
1f5596dd CB |
1014 | static int proc_meminfo_read(char *buf, size_t size, off_t offset, |
1015 | struct fuse_file_info *fi) | |
1016 | { | |
9cb500bc | 1017 | __do_free char *cgroup = NULL, *line = NULL, *memusage_str = NULL, |
37f75e57 | 1018 | *memswusage_str = NULL; |
757a63e7 | 1019 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
1020 | __do_fclose FILE *f = NULL; |
1021 | struct fuse_context *fc = fuse_get_context(); | |
9973cc06 | 1022 | struct lxcfs_opts *opts = (struct lxcfs_opts *)fuse_get_context()->private_data; |
a6ab8435 | 1023 | bool wants_swap = opts && !opts->swap_off && liblxcfs_can_use_swap(), host_swap = false; |
99b183fb | 1024 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
acff9786 | 1025 | uint64_t memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0, |
37f75e57 | 1026 | hosttotal = 0, swfree = 0, swusage = 0, swtotal = 0; |
334a14f9 | 1027 | struct memory_stat mstat = {}; |
1f5596dd CB |
1028 | size_t linelen = 0, total_len = 0; |
1029 | char *cache = d->buf; | |
1030 | size_t cache_size = d->buflen; | |
1031 | int ret; | |
1032 | ||
1033 | if (offset) { | |
1034 | int left; | |
1035 | ||
1036 | if (offset > d->size) | |
1037 | return -EINVAL; | |
1038 | ||
1039 | if (!d->cached) | |
1040 | return 0; | |
1041 | ||
1042 | left = d->size - offset; | |
1043 | total_len = left > size ? size : left; | |
1044 | memcpy(buf, cache + offset, total_len); | |
1045 | ||
1046 | return total_len; | |
1047 | } | |
1048 | ||
1049 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
1050 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
1051 | initpid = fc->pid; | |
1052 | ||
1053 | cgroup = get_pid_cgroup(initpid, "memory"); | |
1054 | if (!cgroup) | |
1055 | return read_file_fuse("/proc/meminfo", buf, size, d); | |
1056 | ||
1057 | prune_init_slice(cgroup); | |
1058 | ||
37f75e57 | 1059 | /* memory limits */ |
1f5596dd CB |
1060 | ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str); |
1061 | if (ret < 0) | |
433f7337 | 1062 | return read_file_fuse("/proc/meminfo", buf, size, d); |
1f5596dd | 1063 | |
37f75e57 CB |
1064 | if (safe_uint64(memusage_str, &memusage, 10) < 0) |
1065 | lxcfs_error("Failed to convert memusage %s", memusage_str); | |
1066 | ||
acff9786 | 1067 | if (!cgroup_parse_memory_stat(cgroup, &mstat)) |
433f7337 | 1068 | return read_file_fuse("/proc/meminfo", buf, size, d); |
1f5596dd | 1069 | |
37f75e57 CB |
1070 | memlimit = get_min_memlimit(cgroup, false); |
1071 | ||
1f5596dd CB |
1072 | /* |
1073 | * Following values are allowed to fail, because swapaccount might be | |
1074 | * turned off for current kernel. | |
1075 | */ | |
5e3ca4d3 | 1076 | if (wants_swap) { |
37f75e57 CB |
1077 | memswlimit = get_min_memlimit(cgroup, true); |
1078 | if (memswlimit > 0) { | |
5e3ca4d3 | 1079 | ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str); |
37f75e57 CB |
1080 | if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) { |
1081 | if (memlimit > memswlimit) | |
1082 | swtotal = 0; | |
1083 | else | |
1084 | swtotal = (memswlimit - memlimit) / 1024; | |
1085 | if (memusage > memswusage || swtotal == 0) | |
1086 | swusage = 0; | |
1087 | else | |
1088 | swusage = (memswusage - memusage) / 1024; | |
8fe0082e | 1089 | } |
5e3ca4d3 | 1090 | } |
1f5596dd CB |
1091 | } |
1092 | ||
757a63e7 | 1093 | f = fopen_cached("/proc/meminfo", "re", &fopen_cache); |
1f5596dd | 1094 | if (!f) |
433f7337 | 1095 | return read_file_fuse("/proc/meminfo", buf, size, d); |
1f5596dd | 1096 | |
37f75e57 CB |
1097 | memusage /= 1024; |
1098 | memlimit /= 1024; | |
1f5596dd CB |
1099 | while (getline(&line, &linelen, f) != -1) { |
1100 | ssize_t l; | |
1101 | char *printme, lbuf[100]; | |
1102 | ||
1103 | memset(lbuf, 0, 100); | |
1104 | if (startswith(line, "MemTotal:")) { | |
acff9786 | 1105 | sscanf(line+sizeof("MemTotal:")-1, "%" PRIu64, &hosttotal); |
65fc6502 CB |
1106 | if (memlimit == 0) |
1107 | memlimit = hosttotal; | |
1108 | ||
1f5596dd CB |
1109 | if (hosttotal < memlimit) |
1110 | memlimit = hosttotal; | |
acff9786 | 1111 | snprintf(lbuf, 100, "MemTotal: %8" PRIu64 " kB\n", memlimit); |
1f5596dd CB |
1112 | printme = lbuf; |
1113 | } else if (startswith(line, "MemFree:")) { | |
acff9786 | 1114 | snprintf(lbuf, 100, "MemFree: %8" PRIu64 " kB\n", memlimit - memusage); |
1f5596dd CB |
1115 | printme = lbuf; |
1116 | } else if (startswith(line, "MemAvailable:")) { | |
acff9786 | 1117 | snprintf(lbuf, 100, "MemAvailable: %8" PRIu64 " kB\n", memlimit - memusage + mstat.total_cache / 1024); |
1f5596dd | 1118 | printme = lbuf; |
4b754ca6 | 1119 | } else if (startswith(line, "SwapTotal:")) { |
37f75e57 CB |
1120 | if (wants_swap) { |
1121 | uint64_t hostswtotal = 0; | |
1122 | ||
1123 | sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal); | |
1124 | ||
1125 | if (hostswtotal < swtotal) { | |
1126 | swtotal = hostswtotal; | |
1127 | host_swap = true; | |
1128 | } | |
1129 | } | |
1130 | ||
8fe0082e | 1131 | snprintf(lbuf, 100, "SwapTotal: %8" PRIu64 " kB\n", swtotal); |
1f5596dd | 1132 | printme = lbuf; |
4b754ca6 | 1133 | } else if (startswith(line, "SwapFree:")) { |
37f75e57 CB |
1134 | if (wants_swap) { |
1135 | uint64_t hostswfree = 0; | |
8fe0082e | 1136 | |
37f75e57 CB |
1137 | if (host_swap) { |
1138 | sscanf(line + STRLITERALLEN("SwapFree:"), "%" PRIu64, &hostswfree); | |
1139 | swfree = hostswfree; | |
1140 | } else if (swtotal >= swusage) { | |
1141 | swfree = swtotal - swusage; | |
1142 | } | |
4b754ca6 | 1143 | } |
37f75e57 | 1144 | |
8fe0082e | 1145 | snprintf(lbuf, 100, "SwapFree: %8" PRIu64 " kB\n", swfree); |
1f5596dd CB |
1146 | printme = lbuf; |
1147 | } else if (startswith(line, "Slab:")) { | |
acff9786 | 1148 | snprintf(lbuf, 100, "Slab: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1149 | printme = lbuf; |
1150 | } else if (startswith(line, "Buffers:")) { | |
acff9786 | 1151 | snprintf(lbuf, 100, "Buffers: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1152 | printme = lbuf; |
1153 | } else if (startswith(line, "Cached:")) { | |
acff9786 CB |
1154 | snprintf(lbuf, 100, "Cached: %8" PRIu64 " kB\n", |
1155 | mstat.total_cache / 1024); | |
1f5596dd CB |
1156 | printme = lbuf; |
1157 | } else if (startswith(line, "SwapCached:")) { | |
acff9786 | 1158 | snprintf(lbuf, 100, "SwapCached: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1159 | printme = lbuf; |
1160 | } else if (startswith(line, "Active:")) { | |
acff9786 CB |
1161 | snprintf(lbuf, 100, "Active: %8" PRIu64 " kB\n", |
1162 | (mstat.total_active_anon + | |
1163 | mstat.total_active_file) / | |
1164 | 1024); | |
1f5596dd CB |
1165 | printme = lbuf; |
1166 | } else if (startswith(line, "Inactive:")) { | |
acff9786 CB |
1167 | snprintf(lbuf, 100, "Inactive: %8" PRIu64 " kB\n", |
1168 | (mstat.total_inactive_anon + | |
1169 | mstat.total_inactive_file) / | |
1170 | 1024); | |
1f5596dd | 1171 | printme = lbuf; |
233818fd | 1172 | } else if (startswith(line, "Active(anon):")) { |
acff9786 CB |
1173 | snprintf(lbuf, 100, "Active(anon): %8" PRIu64 " kB\n", |
1174 | mstat.total_active_anon / 1024); | |
1f5596dd | 1175 | printme = lbuf; |
233818fd | 1176 | } else if (startswith(line, "Inactive(anon):")) { |
acff9786 CB |
1177 | snprintf(lbuf, 100, "Inactive(anon): %8" PRIu64 " kB\n", |
1178 | mstat.total_inactive_anon / 1024); | |
1f5596dd | 1179 | printme = lbuf; |
233818fd | 1180 | } else if (startswith(line, "Active(file):")) { |
acff9786 CB |
1181 | snprintf(lbuf, 100, "Active(file): %8" PRIu64 " kB\n", |
1182 | mstat.total_active_file / 1024); | |
1f5596dd | 1183 | printme = lbuf; |
233818fd | 1184 | } else if (startswith(line, "Inactive(file):")) { |
acff9786 CB |
1185 | snprintf(lbuf, 100, "Inactive(file): %8" PRIu64 " kB\n", |
1186 | mstat.total_inactive_file / 1024); | |
1f5596dd | 1187 | printme = lbuf; |
233818fd | 1188 | } else if (startswith(line, "Unevictable:")) { |
acff9786 CB |
1189 | snprintf(lbuf, 100, "Unevictable: %8" PRIu64 " kB\n", |
1190 | mstat.total_unevictable / 1024); | |
1191 | printme = lbuf; | |
233818fd | 1192 | } else if (startswith(line, "Dirty:")) { |
acff9786 CB |
1193 | snprintf(lbuf, 100, "Dirty: %8" PRIu64 " kB\n", |
1194 | mstat.total_dirty / 1024); | |
1195 | printme = lbuf; | |
233818fd | 1196 | } else if (startswith(line, "Writeback:")) { |
acff9786 CB |
1197 | snprintf(lbuf, 100, "Writeback: %8" PRIu64 " kB\n", |
1198 | mstat.total_writeback / 1024); | |
1199 | printme = lbuf; | |
233818fd | 1200 | } else if (startswith(line, "AnonPages:")) { |
acff9786 CB |
1201 | snprintf(lbuf, 100, "AnonPages: %8" PRIu64 " kB\n", |
1202 | (mstat.total_active_anon + | |
1203 | mstat.total_inactive_anon - mstat.total_shmem) / | |
1204 | 1024); | |
1205 | printme = lbuf; | |
233818fd | 1206 | } else if (startswith(line, "Mapped:")) { |
acff9786 CB |
1207 | snprintf(lbuf, 100, "Mapped: %8" PRIu64 " kB\n", |
1208 | mstat.total_mapped_file / 1024); | |
1f5596dd | 1209 | printme = lbuf; |
233818fd | 1210 | } else if (startswith(line, "SReclaimable:")) { |
acff9786 | 1211 | snprintf(lbuf, 100, "SReclaimable: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd | 1212 | printme = lbuf; |
233818fd | 1213 | } else if (startswith(line, "SUnreclaim:")) { |
acff9786 | 1214 | snprintf(lbuf, 100, "SUnreclaim: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1215 | printme = lbuf; |
1216 | } else if (startswith(line, "Shmem:")) { | |
acff9786 CB |
1217 | snprintf(lbuf, 100, "Shmem: %8" PRIu64 " kB\n", |
1218 | mstat.total_shmem / 1024); | |
1f5596dd | 1219 | printme = lbuf; |
233818fd | 1220 | } else if (startswith(line, "ShmemHugePages:")) { |
acff9786 | 1221 | snprintf(lbuf, 100, "ShmemHugePages: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd | 1222 | printme = lbuf; |
233818fd | 1223 | } else if (startswith(line, "ShmemPmdMapped:")) { |
acff9786 CB |
1224 | snprintf(lbuf, 100, "ShmemPmdMapped: %8" PRIu64 " kB\n", (uint64_t)0); |
1225 | printme = lbuf; | |
233818fd | 1226 | } else if (startswith(line, "AnonHugePages:")) { |
acff9786 CB |
1227 | snprintf(lbuf, 100, "AnonHugePages: %8" PRIu64 " kB\n", |
1228 | mstat.total_rss_huge / 1024); | |
1f5596dd | 1229 | printme = lbuf; |
acff9786 CB |
1230 | } else { |
1231 | printme = line; | |
1232 | } | |
1f5596dd CB |
1233 | |
1234 | l = snprintf(cache, cache_size, "%s", printme); | |
f75d5b75 CB |
1235 | if (l < 0) |
1236 | return log_error(0, "Failed to write cache"); | |
1237 | if (l >= cache_size) | |
1238 | return log_error(0, "Write to cache was truncated"); | |
1f5596dd CB |
1239 | |
1240 | cache += l; | |
1241 | cache_size -= l; | |
1242 | total_len += l; | |
1243 | } | |
1244 | ||
1245 | d->cached = 1; | |
1246 | d->size = total_len; | |
f75d5b75 CB |
1247 | if (total_len > size) |
1248 | total_len = size; | |
1f5596dd CB |
1249 | memcpy(buf, d->buf, total_len); |
1250 | ||
1251 | return total_len; | |
1252 | } | |
1253 | ||
0d894438 CB |
1254 | __lxcfs_fuse_ops int proc_read(const char *path, char *buf, size_t size, |
1255 | off_t offset, struct fuse_file_info *fi) | |
1f5596dd | 1256 | { |
99b183fb | 1257 | struct file_info *f = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
1258 | |
1259 | switch (f->type) { | |
1260 | case LXC_TYPE_PROC_MEMINFO: | |
287555b8 CB |
1261 | if (liblxcfs_functional()) |
1262 | return proc_meminfo_read(buf, size, offset, fi); | |
1263 | ||
1264 | return read_file_fuse_with_offset(LXC_TYPE_PROC_MEMINFO_PATH, | |
1265 | buf, size, offset, f); | |
1f5596dd | 1266 | case LXC_TYPE_PROC_CPUINFO: |
287555b8 CB |
1267 | if (liblxcfs_functional()) |
1268 | return proc_cpuinfo_read(buf, size, offset, fi); | |
1269 | ||
1270 | return read_file_fuse_with_offset(LXC_TYPE_PROC_CPUINFO_PATH, | |
1271 | buf, size, offset, f); | |
1f5596dd | 1272 | case LXC_TYPE_PROC_UPTIME: |
287555b8 CB |
1273 | if (liblxcfs_functional()) |
1274 | return proc_uptime_read(buf, size, offset, fi); | |
1275 | ||
1276 | return read_file_fuse_with_offset(LXC_TYPE_PROC_UPTIME_PATH, | |
1277 | buf, size, offset, f); | |
1f5596dd | 1278 | case LXC_TYPE_PROC_STAT: |
287555b8 CB |
1279 | if (liblxcfs_functional()) |
1280 | return proc_stat_read(buf, size, offset, fi); | |
1281 | ||
1282 | return read_file_fuse_with_offset(LXC_TYPE_PROC_STAT_PATH, buf, | |
1283 | size, offset, f); | |
1f5596dd | 1284 | case LXC_TYPE_PROC_DISKSTATS: |
287555b8 CB |
1285 | if (liblxcfs_functional()) |
1286 | return proc_diskstats_read(buf, size, offset, fi); | |
1287 | ||
1288 | return read_file_fuse_with_offset(LXC_TYPE_PROC_DISKSTATS_PATH, | |
1289 | buf, size, offset, f); | |
1f5596dd | 1290 | case LXC_TYPE_PROC_SWAPS: |
287555b8 CB |
1291 | if (liblxcfs_functional()) |
1292 | return proc_swaps_read(buf, size, offset, fi); | |
1293 | ||
1294 | return read_file_fuse_with_offset(LXC_TYPE_PROC_SWAPS_PATH, buf, | |
1295 | size, offset, f); | |
1f5596dd | 1296 | case LXC_TYPE_PROC_LOADAVG: |
287555b8 CB |
1297 | if (liblxcfs_functional()) |
1298 | return proc_loadavg_read(buf, size, offset, fi); | |
1299 | ||
1300 | return read_file_fuse_with_offset(LXC_TYPE_PROC_LOADAVG_PATH, | |
1301 | buf, size, offset, f); | |
1f5596dd | 1302 | } |
99b183fb CB |
1303 | |
1304 | return -EINVAL; | |
1f5596dd | 1305 | } |