]>
Commit | Line | Data |
---|---|---|
1f5596dd CB |
1 | /* SPDX-License-Identifier: LGPL-2.1-or-later */ |
2 | ||
3 | #ifndef _GNU_SOURCE | |
4 | #define _GNU_SOURCE | |
5 | #endif | |
6 | ||
7 | #ifndef FUSE_USE_VERSION | |
8 | #define FUSE_USE_VERSION 26 | |
9 | #endif | |
10 | ||
11 | #define _FILE_OFFSET_BITS 64 | |
12 | ||
13 | #define __STDC_FORMAT_MACROS | |
14 | #include <dirent.h> | |
15 | #include <errno.h> | |
16 | #include <fcntl.h> | |
17 | #include <fuse.h> | |
18 | #include <inttypes.h> | |
19 | #include <libgen.h> | |
20 | #include <pthread.h> | |
21 | #include <sched.h> | |
22 | #include <stdarg.h> | |
23 | #include <stdbool.h> | |
24 | #include <stdint.h> | |
25 | #include <stdio.h> | |
26 | #include <stdlib.h> | |
27 | #include <string.h> | |
28 | #include <time.h> | |
29 | #include <unistd.h> | |
30 | #include <wait.h> | |
31 | #include <linux/magic.h> | |
32 | #include <linux/sched.h> | |
33 | #include <sys/epoll.h> | |
34 | #include <sys/mman.h> | |
35 | #include <sys/mount.h> | |
36 | #include <sys/param.h> | |
37 | #include <sys/socket.h> | |
38 | #include <sys/syscall.h> | |
39 | #include <sys/sysinfo.h> | |
40 | #include <sys/vfs.h> | |
41 | ||
42 | #include "bindings.h" | |
43 | #include "config.h" | |
44 | #include "cgroup_fuse.h" | |
45 | #include "cgroups/cgroup.h" | |
46 | #include "cgroups/cgroup_utils.h" | |
4ec5c9da | 47 | #include "cpuset_parse.h" |
1f5596dd CB |
48 | #include "memory_utils.h" |
49 | #include "proc_loadavg.h" | |
4ec5c9da | 50 | #include "proc_cpuview.h" |
1f5596dd CB |
51 | #include "utils.h" |
52 | ||
acff9786 CB |
53 | struct memory_stat { |
54 | uint64_t hierarchical_memory_limit; | |
55 | uint64_t hierarchical_memsw_limit; | |
56 | uint64_t total_cache; | |
57 | uint64_t total_rss; | |
58 | uint64_t total_rss_huge; | |
59 | uint64_t total_shmem; | |
60 | uint64_t total_mapped_file; | |
61 | uint64_t total_dirty; | |
62 | uint64_t total_writeback; | |
63 | uint64_t total_swap; | |
64 | uint64_t total_pgpgin; | |
65 | uint64_t total_pgpgout; | |
66 | uint64_t total_pgfault; | |
67 | uint64_t total_pgmajfault; | |
68 | uint64_t total_inactive_anon; | |
69 | uint64_t total_active_anon; | |
70 | uint64_t total_inactive_file; | |
71 | uint64_t total_active_file; | |
72 | uint64_t total_unevictable; | |
73 | }; | |
74 | ||
1f5596dd CB |
75 | int proc_getattr(const char *path, struct stat *sb) |
76 | { | |
77 | struct timespec now; | |
78 | ||
79 | memset(sb, 0, sizeof(struct stat)); | |
80 | if (clock_gettime(CLOCK_REALTIME, &now) < 0) | |
81 | return -EINVAL; | |
82 | sb->st_uid = sb->st_gid = 0; | |
83 | sb->st_atim = sb->st_mtim = sb->st_ctim = now; | |
84 | if (strcmp(path, "/proc") == 0) { | |
85 | sb->st_mode = S_IFDIR | 00555; | |
86 | sb->st_nlink = 2; | |
87 | return 0; | |
88 | } | |
89 | if (strcmp(path, "/proc/meminfo") == 0 || | |
90 | strcmp(path, "/proc/cpuinfo") == 0 || | |
91 | strcmp(path, "/proc/uptime") == 0 || | |
92 | strcmp(path, "/proc/stat") == 0 || | |
93 | strcmp(path, "/proc/diskstats") == 0 || | |
94 | strcmp(path, "/proc/swaps") == 0 || | |
95 | strcmp(path, "/proc/loadavg") == 0) { | |
96 | sb->st_size = 0; | |
97 | sb->st_mode = S_IFREG | 00444; | |
98 | sb->st_nlink = 1; | |
99 | return 0; | |
100 | } | |
101 | ||
102 | return -ENOENT; | |
103 | } | |
104 | ||
105 | int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, | |
106 | off_t offset, struct fuse_file_info *fi) | |
107 | { | |
108 | if (filler(buf, ".", NULL, 0) != 0 || | |
109 | filler(buf, "..", NULL, 0) != 0 || | |
110 | filler(buf, "cpuinfo", NULL, 0) != 0 || | |
111 | filler(buf, "meminfo", NULL, 0) != 0 || | |
112 | filler(buf, "stat", NULL, 0) != 0 || | |
113 | filler(buf, "uptime", NULL, 0) != 0 || | |
114 | filler(buf, "diskstats", NULL, 0) != 0 || | |
115 | filler(buf, "swaps", NULL, 0) != 0 || | |
116 | filler(buf, "loadavg", NULL, 0) != 0) | |
117 | return -EINVAL; | |
118 | ||
119 | return 0; | |
120 | } | |
121 | ||
122 | static off_t get_procfile_size(const char *which) | |
123 | { | |
dbb1f822 | 124 | FILE *f = fopen(which, "re"); |
1f5596dd CB |
125 | char *line = NULL; |
126 | size_t len = 0; | |
127 | ssize_t sz, answer = 0; | |
128 | if (!f) | |
129 | return 0; | |
130 | ||
131 | while ((sz = getline(&line, &len, f)) != -1) | |
132 | answer += sz; | |
133 | fclose (f); | |
134 | free(line); | |
135 | ||
136 | return answer; | |
137 | } | |
138 | ||
139 | int proc_open(const char *path, struct fuse_file_info *fi) | |
140 | { | |
700dd417 | 141 | __do_free struct file_info *info = NULL; |
1f5596dd | 142 | int type = -1; |
1f5596dd CB |
143 | |
144 | if (strcmp(path, "/proc/meminfo") == 0) | |
145 | type = LXC_TYPE_PROC_MEMINFO; | |
146 | else if (strcmp(path, "/proc/cpuinfo") == 0) | |
147 | type = LXC_TYPE_PROC_CPUINFO; | |
148 | else if (strcmp(path, "/proc/uptime") == 0) | |
149 | type = LXC_TYPE_PROC_UPTIME; | |
150 | else if (strcmp(path, "/proc/stat") == 0) | |
151 | type = LXC_TYPE_PROC_STAT; | |
152 | else if (strcmp(path, "/proc/diskstats") == 0) | |
153 | type = LXC_TYPE_PROC_DISKSTATS; | |
154 | else if (strcmp(path, "/proc/swaps") == 0) | |
155 | type = LXC_TYPE_PROC_SWAPS; | |
156 | else if (strcmp(path, "/proc/loadavg") == 0) | |
157 | type = LXC_TYPE_PROC_LOADAVG; | |
158 | if (type == -1) | |
159 | return -ENOENT; | |
160 | ||
161 | info = malloc(sizeof(*info)); | |
162 | if (!info) | |
163 | return -ENOMEM; | |
164 | ||
165 | memset(info, 0, sizeof(*info)); | |
166 | info->type = type; | |
167 | ||
168 | info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE; | |
169 | ||
170 | info->buf = malloc(info->buflen); | |
171 | if (!info->buf) | |
172 | return -ENOMEM; | |
173 | ||
174 | memset(info->buf, 0, info->buflen); | |
175 | /* set actual size to buffer size */ | |
176 | info->size = info->buflen; | |
177 | ||
700dd417 | 178 | fi->fh = PTR_TO_UINT64(move_ptr(info)); |
1f5596dd CB |
179 | return 0; |
180 | } | |
181 | ||
182 | int proc_access(const char *path, int mask) | |
183 | { | |
184 | if (strcmp(path, "/proc") == 0 && access(path, R_OK) == 0) | |
185 | return 0; | |
186 | ||
187 | /* these are all read-only */ | |
188 | if ((mask & ~R_OK) != 0) | |
189 | return -EACCES; | |
190 | return 0; | |
191 | } | |
192 | ||
193 | int proc_release(const char *path, struct fuse_file_info *fi) | |
194 | { | |
195 | do_release_file_info(fi); | |
196 | return 0; | |
197 | } | |
198 | ||
199 | static unsigned long get_memlimit(const char *cgroup, bool swap) | |
200 | { | |
201 | int ret; | |
202 | __do_free char *memlimit_str = NULL; | |
203 | unsigned long memlimit = -1; | |
204 | ||
205 | if (swap) | |
206 | ret = cgroup_ops->get_memory_swap_max(cgroup_ops, cgroup, &memlimit_str); | |
207 | else | |
208 | ret = cgroup_ops->get_memory_max(cgroup_ops, cgroup, &memlimit_str); | |
209 | if (ret > 0) | |
210 | memlimit = strtoul(memlimit_str, NULL, 10); | |
211 | ||
212 | return memlimit; | |
213 | } | |
214 | ||
215 | static unsigned long get_min_memlimit(const char *cgroup, bool swap) | |
216 | { | |
217 | __do_free char *copy = NULL; | |
218 | unsigned long memlimit = 0; | |
219 | unsigned long retlimit; | |
220 | ||
221 | copy = strdup(cgroup); | |
222 | retlimit = get_memlimit(copy, swap); | |
223 | ||
224 | while (strcmp(copy, "/") != 0) { | |
225 | char *it = copy; | |
226 | ||
227 | it = dirname(it); | |
228 | memlimit = get_memlimit(it, swap); | |
229 | if (memlimit != -1 && memlimit < retlimit) | |
230 | retlimit = memlimit; | |
231 | }; | |
232 | ||
233 | return retlimit; | |
234 | } | |
235 | ||
4ec5c9da CB |
236 | static bool startswith(const char *line, const char *pref) |
237 | { | |
238 | if (strncmp(line, pref, strlen(pref)) == 0) | |
239 | return true; | |
240 | return false; | |
241 | } | |
242 | ||
1f5596dd CB |
243 | static int proc_swaps_read(char *buf, size_t size, off_t offset, |
244 | struct fuse_file_info *fi) | |
245 | { | |
246 | __do_free char *cg = NULL, *memswlimit_str = NULL, *memusage_str = NULL, | |
247 | *memswusage_str = NULL; | |
248 | struct fuse_context *fc = fuse_get_context(); | |
99b183fb | 249 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
250 | unsigned long memswlimit = 0, memlimit = 0, memusage = 0, |
251 | memswusage = 0, swap_total = 0, swap_free = 0; | |
252 | ssize_t total_len = 0; | |
253 | ssize_t l = 0; | |
254 | char *cache = d->buf; | |
255 | int ret; | |
256 | ||
257 | if (offset) { | |
258 | int left; | |
259 | ||
260 | if (offset > d->size) | |
261 | return -EINVAL; | |
262 | ||
263 | if (!d->cached) | |
264 | return 0; | |
265 | ||
266 | left = d->size - offset; | |
267 | total_len = left > size ? size: left; | |
268 | memcpy(buf, cache + offset, total_len); | |
269 | ||
270 | return total_len; | |
271 | } | |
272 | ||
273 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
274 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
275 | initpid = fc->pid; | |
276 | cg = get_pid_cgroup(initpid, "memory"); | |
277 | if (!cg) | |
278 | return read_file_fuse("/proc/swaps", buf, size, d); | |
279 | prune_init_slice(cg); | |
280 | ||
281 | memlimit = get_min_memlimit(cg, false); | |
282 | ||
283 | ret = cgroup_ops->get_memory_current(cgroup_ops, cg, &memusage_str); | |
284 | if (ret < 0) | |
285 | return 0; | |
286 | ||
287 | memusage = strtoul(memusage_str, NULL, 10); | |
288 | ||
289 | ret = cgroup_ops->get_memory_swap_max(cgroup_ops, cg, &memswlimit_str); | |
290 | if (ret >= 0) | |
291 | ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cg, &memswusage_str); | |
292 | if (ret >= 0) { | |
293 | memswlimit = get_min_memlimit(cg, true); | |
294 | memswusage = strtoul(memswusage_str, NULL, 10); | |
295 | swap_total = (memswlimit - memlimit) / 1024; | |
296 | swap_free = (memswusage - memusage) / 1024; | |
297 | } | |
298 | ||
299 | total_len = snprintf(d->buf, d->size, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); | |
300 | ||
301 | /* When no mem + swap limit is specified or swapaccount=0*/ | |
302 | if (!memswlimit) { | |
303 | __do_free char *line = NULL; | |
757a63e7 | 304 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
305 | __do_fclose FILE *f = NULL; |
306 | size_t linelen = 0; | |
307 | ||
757a63e7 | 308 | f = fopen_cached("/proc/meminfo", "re", &fopen_cache); |
1f5596dd CB |
309 | if (!f) |
310 | return 0; | |
311 | ||
312 | while (getline(&line, &linelen, f) != -1) { | |
313 | if (startswith(line, "SwapTotal:")) | |
314 | sscanf(line, "SwapTotal: %8lu kB", &swap_total); | |
315 | else if (startswith(line, "SwapFree:")) | |
316 | sscanf(line, "SwapFree: %8lu kB", &swap_free); | |
317 | } | |
318 | } | |
319 | ||
320 | if (swap_total > 0) { | |
321 | l = snprintf(d->buf + total_len, d->size - total_len, | |
322 | "none%*svirtual\t\t%lu\t%lu\t0\n", 36, " ", | |
323 | swap_total, swap_free); | |
324 | total_len += l; | |
325 | } | |
326 | ||
327 | if (total_len < 0 || l < 0) { | |
328 | perror("Error writing to cache"); | |
329 | return 0; | |
330 | } | |
331 | ||
332 | d->cached = 1; | |
333 | d->size = (int)total_len; | |
334 | ||
335 | if (total_len > size) total_len = size; | |
336 | memcpy(buf, d->buf, total_len); | |
337 | return total_len; | |
338 | } | |
339 | ||
340 | static void get_blkio_io_value(char *str, unsigned major, unsigned minor, | |
341 | char *iotype, unsigned long *v) | |
342 | { | |
343 | char *eol; | |
344 | char key[32]; | |
345 | ||
346 | memset(key, 0, 32); | |
347 | snprintf(key, 32, "%u:%u %s", major, minor, iotype); | |
348 | ||
349 | size_t len = strlen(key); | |
350 | *v = 0; | |
351 | ||
352 | while (*str) { | |
353 | if (startswith(str, key)) { | |
354 | sscanf(str + len, "%lu", v); | |
355 | return; | |
356 | } | |
357 | eol = strchr(str, '\n'); | |
358 | if (!eol) | |
359 | return; | |
360 | str = eol+1; | |
361 | } | |
362 | } | |
363 | ||
364 | static int proc_diskstats_read(char *buf, size_t size, off_t offset, | |
365 | struct fuse_file_info *fi) | |
366 | { | |
367 | __do_free char *cg = NULL, *io_serviced_str = NULL, | |
368 | *io_merged_str = NULL, *io_service_bytes_str = NULL, | |
369 | *io_wait_time_str = NULL, *io_service_time_str = NULL, | |
370 | *line = NULL; | |
757a63e7 | 371 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
372 | __do_fclose FILE *f = NULL; |
373 | struct fuse_context *fc = fuse_get_context(); | |
99b183fb | 374 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
375 | unsigned long read = 0, write = 0; |
376 | unsigned long read_merged = 0, write_merged = 0; | |
377 | unsigned long read_sectors = 0, write_sectors = 0; | |
378 | unsigned long read_ticks = 0, write_ticks = 0; | |
379 | unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0; | |
380 | unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0; | |
381 | char *cache = d->buf; | |
382 | size_t cache_size = d->buflen; | |
383 | size_t linelen = 0, total_len = 0; | |
384 | unsigned int major = 0, minor = 0; | |
385 | int i = 0; | |
386 | int ret; | |
387 | char dev_name[72]; | |
388 | ||
389 | if (offset){ | |
390 | int left; | |
391 | ||
392 | if (offset > d->size) | |
393 | return -EINVAL; | |
394 | ||
395 | if (!d->cached) | |
396 | return 0; | |
397 | ||
398 | left = d->size - offset; | |
399 | total_len = left > size ? size: left; | |
400 | memcpy(buf, cache + offset, total_len); | |
401 | ||
402 | return total_len; | |
403 | } | |
404 | ||
405 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
406 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
407 | initpid = fc->pid; | |
408 | cg = get_pid_cgroup(initpid, "blkio"); | |
409 | if (!cg) | |
410 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
411 | prune_init_slice(cg); | |
412 | ||
413 | ret = cgroup_ops->get_io_serviced(cgroup_ops, cg, &io_serviced_str); | |
414 | if (ret < 0) { | |
415 | if (ret == -EOPNOTSUPP) | |
416 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
417 | } | |
418 | ||
419 | ret = cgroup_ops->get_io_merged(cgroup_ops, cg, &io_merged_str); | |
420 | if (ret < 0) { | |
421 | if (ret == -EOPNOTSUPP) | |
422 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
423 | } | |
424 | ||
425 | ret = cgroup_ops->get_io_service_bytes(cgroup_ops, cg, &io_service_bytes_str); | |
426 | if (ret < 0) { | |
427 | if (ret == -EOPNOTSUPP) | |
428 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
429 | } | |
430 | ||
431 | ret = cgroup_ops->get_io_wait_time(cgroup_ops, cg, &io_wait_time_str); | |
432 | if (ret < 0) { | |
433 | if (ret == -EOPNOTSUPP) | |
434 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
435 | } | |
436 | ||
437 | ret = cgroup_ops->get_io_service_time(cgroup_ops, cg, &io_service_time_str); | |
438 | if (ret < 0) { | |
439 | if (ret == -EOPNOTSUPP) | |
440 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
441 | } | |
442 | ||
757a63e7 | 443 | f = fopen_cached("/proc/diskstats", "re", &fopen_cache); |
1f5596dd CB |
444 | if (!f) |
445 | return 0; | |
446 | ||
447 | while (getline(&line, &linelen, f) != -1) { | |
448 | ssize_t l; | |
449 | char lbuf[256]; | |
450 | ||
451 | i = sscanf(line, "%u %u %71s", &major, &minor, dev_name); | |
452 | if (i != 3) | |
453 | continue; | |
454 | ||
455 | get_blkio_io_value(io_serviced_str, major, minor, "Read", &read); | |
456 | get_blkio_io_value(io_serviced_str, major, minor, "Write", &write); | |
457 | get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged); | |
458 | get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged); | |
459 | get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors); | |
460 | read_sectors = read_sectors/512; | |
461 | get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors); | |
462 | write_sectors = write_sectors/512; | |
463 | ||
464 | get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm); | |
465 | rd_svctm = rd_svctm/1000000; | |
466 | get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait); | |
467 | rd_wait = rd_wait/1000000; | |
468 | read_ticks = rd_svctm + rd_wait; | |
469 | ||
470 | get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm); | |
471 | wr_svctm = wr_svctm/1000000; | |
472 | get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait); | |
473 | wr_wait = wr_wait/1000000; | |
474 | write_ticks = wr_svctm + wr_wait; | |
475 | ||
476 | get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks); | |
477 | tot_ticks = tot_ticks/1000000; | |
478 | ||
479 | memset(lbuf, 0, 256); | |
480 | if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) | |
481 | snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | |
482 | major, minor, dev_name, read, read_merged, read_sectors, read_ticks, | |
483 | write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks); | |
484 | else | |
485 | continue; | |
486 | ||
487 | l = snprintf(cache, cache_size, "%s", lbuf); | |
488 | if (l < 0) { | |
489 | perror("Error writing to fuse buf"); | |
490 | return 0; | |
491 | } | |
492 | if (l >= cache_size) { | |
493 | lxcfs_error("%s\n", "Internal error: truncated write to cache."); | |
494 | return 0; | |
495 | } | |
496 | cache += l; | |
497 | cache_size -= l; | |
498 | total_len += l; | |
499 | } | |
500 | ||
501 | d->cached = 1; | |
502 | d->size = total_len; | |
503 | if (total_len > size ) total_len = size; | |
504 | memcpy(buf, d->buf, total_len); | |
505 | ||
506 | return total_len; | |
507 | } | |
508 | ||
509 | #if RELOADTEST | |
12a60884 | 510 | static inline void iwashere(void) |
1f5596dd | 511 | { |
12a60884 | 512 | mknod("/tmp/lxcfs-iwashere", S_IFREG, 0644); |
1f5596dd CB |
513 | } |
514 | #endif | |
515 | ||
516 | /* This function retrieves the busy time of a group of tasks by looking at | |
517 | * cpuacct.usage. Unfortunately, this only makes sense when the container has | |
518 | * been given it's own cpuacct cgroup. If not, this function will take the busy | |
519 | * time of all other taks that do not actually belong to the container into | |
520 | * account as well. If someone has a clever solution for this please send a | |
521 | * patch! | |
522 | */ | |
523 | static double get_reaper_busy(pid_t task) | |
524 | { | |
525 | __do_free char *cgroup = NULL, *usage_str = NULL; | |
526 | unsigned long usage = 0; | |
527 | pid_t initpid; | |
528 | ||
529 | initpid = lookup_initpid_in_store(task); | |
530 | if (initpid <= 0) | |
531 | return 0; | |
532 | ||
533 | cgroup = get_pid_cgroup(initpid, "cpuacct"); | |
534 | if (!cgroup) | |
535 | return 0; | |
536 | prune_init_slice(cgroup); | |
537 | if (!cgroup_ops->get(cgroup_ops, "cpuacct", cgroup, "cpuacct.usage", | |
538 | &usage_str)) | |
539 | return 0; | |
540 | ||
541 | usage = strtoul(usage_str, NULL, 10); | |
542 | return ((double)usage / 1000000000); | |
543 | } | |
544 | ||
545 | static uint64_t get_reaper_start_time(pid_t pid) | |
546 | { | |
757a63e7 | 547 | __do_free void *fopen_cache = NULL; |
12a60884 | 548 | __do_fclose FILE *f = NULL; |
1f5596dd | 549 | int ret; |
1f5596dd CB |
550 | uint64_t starttime; |
551 | /* strlen("/proc/") = 6 | |
552 | * + | |
553 | * LXCFS_NUMSTRLEN64 | |
554 | * + | |
555 | * strlen("/stat") = 5 | |
556 | * + | |
557 | * \0 = 1 | |
558 | * */ | |
559 | #define __PROC_PID_STAT_LEN (6 + LXCFS_NUMSTRLEN64 + 5 + 1) | |
560 | char path[__PROC_PID_STAT_LEN]; | |
561 | pid_t qpid; | |
562 | ||
563 | qpid = lookup_initpid_in_store(pid); | |
564 | if (qpid <= 0) { | |
565 | /* Caller can check for EINVAL on 0. */ | |
566 | errno = EINVAL; | |
567 | return 0; | |
568 | } | |
569 | ||
570 | ret = snprintf(path, __PROC_PID_STAT_LEN, "/proc/%d/stat", qpid); | |
571 | if (ret < 0 || ret >= __PROC_PID_STAT_LEN) { | |
572 | /* Caller can check for EINVAL on 0. */ | |
573 | errno = EINVAL; | |
574 | return 0; | |
575 | } | |
576 | ||
757a63e7 | 577 | f = fopen_cached(path, "re", &fopen_cache); |
1f5596dd CB |
578 | if (!f) { |
579 | /* Caller can check for EINVAL on 0. */ | |
580 | errno = EINVAL; | |
581 | return 0; | |
582 | } | |
583 | ||
584 | /* Note that the *scanf() argument supression requires that length | |
585 | * modifiers such as "l" are omitted. Otherwise some compilers will yell | |
586 | * at us. It's like telling someone you're not married and then asking | |
587 | * if you can bring your wife to the party. | |
588 | */ | |
589 | ret = fscanf(f, "%*d " /* (1) pid %d */ | |
590 | "%*s " /* (2) comm %s */ | |
591 | "%*c " /* (3) state %c */ | |
592 | "%*d " /* (4) ppid %d */ | |
593 | "%*d " /* (5) pgrp %d */ | |
594 | "%*d " /* (6) session %d */ | |
595 | "%*d " /* (7) tty_nr %d */ | |
596 | "%*d " /* (8) tpgid %d */ | |
597 | "%*u " /* (9) flags %u */ | |
598 | "%*u " /* (10) minflt %lu */ | |
599 | "%*u " /* (11) cminflt %lu */ | |
600 | "%*u " /* (12) majflt %lu */ | |
601 | "%*u " /* (13) cmajflt %lu */ | |
602 | "%*u " /* (14) utime %lu */ | |
603 | "%*u " /* (15) stime %lu */ | |
604 | "%*d " /* (16) cutime %ld */ | |
605 | "%*d " /* (17) cstime %ld */ | |
606 | "%*d " /* (18) priority %ld */ | |
607 | "%*d " /* (19) nice %ld */ | |
608 | "%*d " /* (20) num_threads %ld */ | |
609 | "%*d " /* (21) itrealvalue %ld */ | |
610 | "%" PRIu64, /* (22) starttime %llu */ | |
611 | &starttime); | |
12a60884 CB |
612 | if (ret != 1) |
613 | return ret_set_errno(0, EINVAL); | |
1f5596dd | 614 | |
12a60884 | 615 | return ret_set_errno(starttime, 0); |
1f5596dd CB |
616 | } |
617 | ||
618 | static double get_reaper_start_time_in_sec(pid_t pid) | |
619 | { | |
620 | uint64_t clockticks, ticks_per_sec; | |
621 | int64_t ret; | |
622 | double res = 0; | |
623 | ||
624 | clockticks = get_reaper_start_time(pid); | |
625 | if (clockticks == 0 && errno == EINVAL) { | |
626 | lxcfs_debug("failed to retrieve start time of pid %d\n", pid); | |
627 | return 0; | |
628 | } | |
629 | ||
630 | ret = sysconf(_SC_CLK_TCK); | |
631 | if (ret < 0 && errno == EINVAL) { | |
632 | lxcfs_debug( | |
633 | "%s\n", | |
634 | "failed to determine number of clock ticks in a second"); | |
635 | return 0; | |
636 | } | |
637 | ||
638 | ticks_per_sec = (uint64_t)ret; | |
639 | res = (double)clockticks / ticks_per_sec; | |
640 | return res; | |
641 | } | |
642 | ||
643 | static double get_reaper_age(pid_t pid) | |
644 | { | |
645 | uint64_t uptime_ms; | |
646 | double procstart, procage; | |
647 | ||
648 | /* We need to substract the time the process has started since system | |
649 | * boot minus the time when the system has started to get the actual | |
650 | * reaper age. | |
651 | */ | |
652 | procstart = get_reaper_start_time_in_sec(pid); | |
653 | procage = procstart; | |
654 | if (procstart > 0) { | |
655 | int ret; | |
656 | struct timespec spec; | |
657 | ||
658 | ret = clock_gettime(CLOCK_BOOTTIME, &spec); | |
659 | if (ret < 0) | |
660 | return 0; | |
661 | ||
662 | /* We could make this more precise here by using the tv_nsec | |
663 | * field in the timespec struct and convert it to milliseconds | |
664 | * and then create a double for the seconds and milliseconds but | |
665 | * that seems more work than it is worth. | |
666 | */ | |
667 | uptime_ms = (spec.tv_sec * 1000) + (spec.tv_nsec * 1e-6); | |
668 | procage = (uptime_ms - (procstart * 1000)) / 1000; | |
669 | } | |
670 | ||
671 | return procage; | |
672 | } | |
673 | ||
674 | /* | |
675 | * We read /proc/uptime and reuse its second field. | |
676 | * For the first field, we use the mtime for the reaper for | |
677 | * the calling pid as returned by getreaperage | |
678 | */ | |
679 | static int proc_uptime_read(char *buf, size_t size, off_t offset, | |
680 | struct fuse_file_info *fi) | |
681 | { | |
682 | struct fuse_context *fc = fuse_get_context(); | |
99b183fb | 683 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
684 | double busytime = get_reaper_busy(fc->pid); |
685 | char *cache = d->buf; | |
686 | ssize_t total_len = 0; | |
687 | double idletime, reaperage; | |
688 | ||
689 | #if RELOADTEST | |
690 | iwashere(); | |
691 | #endif | |
692 | ||
693 | if (offset){ | |
694 | if (!d->cached) | |
695 | return 0; | |
696 | if (offset > d->size) | |
697 | return -EINVAL; | |
698 | int left = d->size - offset; | |
699 | total_len = left > size ? size: left; | |
700 | memcpy(buf, cache + offset, total_len); | |
701 | return total_len; | |
702 | } | |
703 | ||
704 | reaperage = get_reaper_age(fc->pid); | |
705 | /* To understand why this is done, please read the comment to the | |
706 | * get_reaper_busy() function. | |
707 | */ | |
708 | idletime = reaperage; | |
709 | if (reaperage >= busytime) | |
710 | idletime = reaperage - busytime; | |
711 | ||
712 | total_len = snprintf(d->buf, d->buflen, "%.2lf %.2lf\n", reaperage, idletime); | |
713 | if (total_len < 0 || total_len >= d->buflen){ | |
714 | lxcfs_error("%s\n", "failed to write to cache"); | |
715 | return 0; | |
716 | } | |
717 | ||
718 | d->size = (int)total_len; | |
719 | d->cached = 1; | |
720 | ||
721 | if (total_len > size) total_len = size; | |
722 | ||
723 | memcpy(buf, d->buf, total_len); | |
724 | return total_len; | |
725 | } | |
726 | ||
727 | #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2) | |
728 | static int proc_stat_read(char *buf, size_t size, off_t offset, | |
729 | struct fuse_file_info *fi) | |
730 | { | |
731 | __do_free char *cg = NULL, *cpuset = NULL, *line = NULL; | |
757a63e7 | 732 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
733 | __do_free struct cpuacct_usage *cg_cpu_usage = NULL; |
734 | __do_fclose FILE *f = NULL; | |
735 | struct fuse_context *fc = fuse_get_context(); | |
99b183fb | 736 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
737 | size_t linelen = 0, total_len = 0; |
738 | int curcpu = -1; /* cpu numbering starts at 0 */ | |
739 | int physcpu = 0; | |
740 | unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, | |
741 | irq = 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0; | |
742 | unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, | |
743 | iowait_sum = 0, irq_sum = 0, softirq_sum = 0, | |
744 | steal_sum = 0, guest_sum = 0, guest_nice_sum = 0; | |
745 | char cpuall[CPUALL_MAX_SIZE]; | |
746 | /* reserve for cpu all */ | |
747 | char *cache = d->buf + CPUALL_MAX_SIZE; | |
748 | size_t cache_size = d->buflen - CPUALL_MAX_SIZE; | |
749 | int cg_cpu_usage_size = 0; | |
750 | ||
751 | if (offset){ | |
752 | if (offset > d->size) | |
753 | return -EINVAL; | |
754 | if (!d->cached) | |
755 | return 0; | |
756 | int left = d->size - offset; | |
757 | total_len = left > size ? size: left; | |
758 | memcpy(buf, d->buf + offset, total_len); | |
759 | return total_len; | |
760 | } | |
761 | ||
762 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
763 | lxcfs_v("initpid: %d\n", initpid); | |
764 | if (initpid <= 0) | |
765 | initpid = fc->pid; | |
766 | ||
767 | /* | |
768 | * when container run with host pid namespace initpid == 1, cgroup will "/" | |
769 | * we should return host os's /proc contents. | |
770 | * in some case cpuacct_usage.all in "/" will larger then /proc/stat | |
771 | */ | |
772 | if (initpid == 1) { | |
773 | return read_file_fuse("/proc/stat", buf, size, d); | |
774 | } | |
775 | ||
776 | cg = get_pid_cgroup(initpid, "cpuset"); | |
777 | lxcfs_v("cg: %s\n", cg); | |
778 | if (!cg) | |
779 | return read_file_fuse("/proc/stat", buf, size, d); | |
780 | prune_init_slice(cg); | |
781 | ||
782 | cpuset = get_cpuset(cg); | |
783 | if (!cpuset) | |
784 | return 0; | |
785 | ||
786 | /* | |
787 | * Read cpuacct.usage_all for all CPUs. | |
788 | * If the cpuacct cgroup is present, it is used to calculate the container's | |
789 | * CPU usage. If not, values from the host's /proc/stat are used. | |
790 | */ | |
2b8eff1d CB |
791 | if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage, &cg_cpu_usage_size) != 0) |
792 | lxcfs_v("%s\n", "proc_stat_read failed to read from cpuacct, falling back to the host's /proc/stat"); | |
1f5596dd | 793 | |
757a63e7 | 794 | f = fopen_cached("/proc/stat", "re", &fopen_cache); |
1f5596dd CB |
795 | if (!f) |
796 | return 0; | |
797 | ||
798 | //skip first line | |
799 | if (getline(&line, &linelen, f) < 0) { | |
800 | lxcfs_error("%s\n", "proc_stat_read read first line failed."); | |
801 | return 0; | |
802 | } | |
803 | ||
804 | if (cgroup_ops->can_use_cpuview(cgroup_ops) && cg_cpu_usage) { | |
805 | total_len = cpuview_proc_stat(cg, cpuset, cg_cpu_usage, cg_cpu_usage_size, | |
806 | f, d->buf, d->buflen); | |
807 | goto out; | |
808 | } | |
809 | ||
810 | while (getline(&line, &linelen, f) != -1) { | |
811 | ssize_t l; | |
812 | char cpu_char[10]; /* That's a lot of cores */ | |
813 | char *c; | |
814 | uint64_t all_used, cg_used, new_idle; | |
815 | int ret; | |
816 | ||
817 | if (strlen(line) == 0) | |
818 | continue; | |
819 | if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) { | |
820 | /* not a ^cpuN line containing a number N, just print it */ | |
821 | l = snprintf(cache, cache_size, "%s", line); | |
822 | if (l < 0) { | |
823 | perror("Error writing to cache"); | |
824 | return 0; | |
825 | } | |
826 | if (l >= cache_size) { | |
827 | lxcfs_error("%s\n", "Internal error: truncated write to cache."); | |
828 | return 0; | |
829 | } | |
830 | cache += l; | |
831 | cache_size -= l; | |
832 | total_len += l; | |
833 | continue; | |
834 | } | |
835 | ||
836 | if (sscanf(cpu_char, "%d", &physcpu) != 1) | |
837 | continue; | |
838 | if (!cpu_in_cpuset(physcpu, cpuset)) | |
839 | continue; | |
2b8eff1d | 840 | curcpu++; |
1f5596dd CB |
841 | |
842 | ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", | |
843 | &user, | |
844 | &nice, | |
845 | &system, | |
846 | &idle, | |
847 | &iowait, | |
848 | &irq, | |
849 | &softirq, | |
850 | &steal, | |
851 | &guest, | |
852 | &guest_nice); | |
853 | ||
854 | if (ret != 10 || !cg_cpu_usage) { | |
855 | c = strchr(line, ' '); | |
856 | if (!c) | |
857 | continue; | |
858 | l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c); | |
859 | if (l < 0) { | |
860 | perror("Error writing to cache"); | |
861 | return 0; | |
862 | ||
863 | } | |
864 | if (l >= cache_size) { | |
865 | lxcfs_error("%s\n", "Internal error: truncated write to cache."); | |
866 | return 0; | |
867 | } | |
868 | ||
869 | cache += l; | |
870 | cache_size -= l; | |
871 | total_len += l; | |
872 | ||
873 | if (ret != 10) | |
874 | continue; | |
875 | } | |
876 | ||
877 | if (cg_cpu_usage) { | |
878 | if (physcpu >= cg_cpu_usage_size) | |
879 | break; | |
880 | ||
881 | all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice; | |
882 | cg_used = cg_cpu_usage[physcpu].user + cg_cpu_usage[physcpu].system; | |
883 | ||
884 | if (all_used >= cg_used) { | |
885 | new_idle = idle + (all_used - cg_used); | |
886 | ||
887 | } else { | |
2b8eff1d CB |
888 | lxcfs_error("cpu%d from %s has unexpected cpu time: %" PRIu64 " in /proc/stat, %" PRIu64 " in cpuacct.usage_all; unable to determine idle time", |
889 | curcpu, cg, all_used, cg_used); | |
1f5596dd CB |
890 | new_idle = idle; |
891 | } | |
892 | ||
2b8eff1d CB |
893 | l = snprintf(cache, cache_size, |
894 | "cpu%d %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n", | |
895 | curcpu, cg_cpu_usage[physcpu].user, | |
896 | cg_cpu_usage[physcpu].system, new_idle); | |
1f5596dd CB |
897 | |
898 | if (l < 0) { | |
899 | perror("Error writing to cache"); | |
900 | return 0; | |
901 | ||
902 | } | |
903 | if (l >= cache_size) { | |
904 | lxcfs_error("%s\n", "Internal error: truncated write to cache."); | |
905 | return 0; | |
906 | } | |
907 | ||
908 | cache += l; | |
909 | cache_size -= l; | |
910 | total_len += l; | |
911 | ||
912 | user_sum += cg_cpu_usage[physcpu].user; | |
913 | system_sum += cg_cpu_usage[physcpu].system; | |
914 | idle_sum += new_idle; | |
915 | ||
916 | } else { | |
917 | user_sum += user; | |
918 | nice_sum += nice; | |
919 | system_sum += system; | |
920 | idle_sum += idle; | |
921 | iowait_sum += iowait; | |
922 | irq_sum += irq; | |
923 | softirq_sum += softirq; | |
924 | steal_sum += steal; | |
925 | guest_sum += guest; | |
926 | guest_nice_sum += guest_nice; | |
927 | } | |
928 | } | |
929 | ||
930 | cache = d->buf; | |
931 | ||
932 | int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | |
933 | user_sum, | |
934 | nice_sum, | |
935 | system_sum, | |
936 | idle_sum, | |
937 | iowait_sum, | |
938 | irq_sum, | |
939 | softirq_sum, | |
940 | steal_sum, | |
941 | guest_sum, | |
942 | guest_nice_sum); | |
943 | if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE) { | |
944 | memcpy(cache, cpuall, cpuall_len); | |
945 | cache += cpuall_len; | |
946 | } else { | |
947 | /* shouldn't happen */ | |
948 | lxcfs_error("proc_stat_read copy cpuall failed, cpuall_len=%d.", cpuall_len); | |
949 | cpuall_len = 0; | |
950 | } | |
951 | ||
952 | memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len); | |
953 | total_len += cpuall_len; | |
954 | ||
955 | out: | |
956 | d->cached = 1; | |
957 | d->size = total_len; | |
958 | if (total_len > size) | |
959 | total_len = size; | |
960 | ||
961 | memcpy(buf, d->buf, total_len); | |
962 | return total_len; | |
963 | } | |
964 | ||
965 | /* Note that "memory.stat" in cgroup2 is hierarchical by default. */ | |
acff9786 | 966 | static bool cgroup_parse_memory_stat(const char *cgroup, struct memory_stat *mstat) |
1f5596dd | 967 | { |
acff9786 CB |
968 | __do_close_prot_errno int fd = -EBADF; |
969 | __do_fclose FILE *f = NULL; | |
970 | __do_free char *line = NULL; | |
971 | bool unified; | |
972 | size_t len = 0; | |
973 | ssize_t linelen; | |
1f5596dd | 974 | |
acff9786 CB |
975 | fd = cgroup_ops->get_memory_stats_fd(cgroup_ops, cgroup); |
976 | if (fd < 0) | |
977 | return false; | |
978 | ||
979 | f = fdopen(fd, "re"); | |
980 | if (!f) | |
981 | return false; | |
982 | /* Transferring ownership to fdopen(). */ | |
983 | move_fd(fd); | |
984 | ||
985 | unified = pure_unified_layout(cgroup_ops); | |
986 | while ((linelen = getline(&line, &len, f)) != -1) { | |
987 | if (!unified && startswith(line, "hierarchical_memory_limit")) { | |
988 | sscanf(line, "hierarchical_memory_limit %" PRIu64, &(mstat->hierarchical_memory_limit)); | |
989 | } else if (!unified && startswith(line, "hierarchical_memsw_limit")) { | |
990 | sscanf(line, "hierarchical_memsw_limit %" PRIu64, &(mstat->hierarchical_memsw_limit)); | |
91d63a9e IM |
991 | } else if (startswith(line, unified ? "file" :"total_cache")) { |
992 | sscanf(line, unified ? "file %" PRIu64 : "total_cache %" PRIu64, &(mstat->total_cache)); | |
acff9786 CB |
993 | } else if (!unified && startswith(line, "total_rss")) { |
994 | sscanf(line, "total_rss %" PRIu64, &(mstat->total_rss)); | |
995 | } else if (!unified && startswith(line, "total_rss_huge")) { | |
996 | sscanf(line, "total_rss_huge %" PRIu64, &(mstat->total_rss_huge)); | |
997 | } else if (startswith(line, unified ? "shmem" : "total_shmem")) { | |
998 | sscanf(line, unified ? "shmem %" PRIu64 : "total_shmem %" PRIu64, &(mstat->total_shmem)); | |
999 | } else if (startswith(line, unified ? "file_mapped" : "total_mapped_file")) { | |
1000 | sscanf(line, unified ? "file_mapped %" PRIu64 : "total_mapped_file %" PRIu64, &(mstat->total_mapped_file)); | |
1001 | } else if (!unified && startswith(line, "total_dirty")) { | |
1002 | sscanf(line, "total_dirty %" PRIu64, &(mstat->total_dirty)); | |
1003 | } else if (!unified && startswith(line, "total_writeback")) { | |
1004 | sscanf(line, "total_writeback %" PRIu64, &(mstat->total_writeback)); | |
1005 | } else if (!unified && startswith(line, "total_swap")) { | |
1006 | sscanf(line, "total_swap %" PRIu64, &(mstat->total_swap)); | |
1007 | } else if (!unified && startswith(line, "total_pgpgin")) { | |
1008 | sscanf(line, "total_pgpgin %" PRIu64, &(mstat->total_pgpgin)); | |
1009 | } else if (!unified && startswith(line, "total_pgpgout")) { | |
1010 | sscanf(line, "total_pgpgout %" PRIu64, &(mstat->total_pgpgout)); | |
1011 | } else if (startswith(line, unified ? "pgfault" : "total_pgfault")) { | |
1012 | sscanf(line, unified ? "pgfault %" PRIu64 : "total_pgfault %" PRIu64, &(mstat->total_pgfault)); | |
1013 | } else if (startswith(line, unified ? "pgmajfault" : "total_pgmajfault")) { | |
1014 | sscanf(line, unified ? "pgmajfault %" PRIu64 : "total_pgmajfault %" PRIu64, &(mstat->total_pgmajfault)); | |
1015 | } else if (startswith(line, unified ? "inactive_anon" : "total_inactive_anon")) { | |
1016 | sscanf(line, unified ? "inactive_anon %" PRIu64 : "total_inactive_anon %" PRIu64, &(mstat->total_inactive_anon)); | |
1017 | } else if (startswith(line, unified ? "active_anon" : "total_active_anon")) { | |
1018 | sscanf(line, unified ? "active_anon %" PRIu64 : "total_active_anon %" PRIu64, &(mstat->total_active_anon)); | |
1019 | } else if (startswith(line, unified ? "inactive_file" : "total_inactive_file")) { | |
1020 | sscanf(line, unified ? "inactive_file %" PRIu64 : "total_inactive_file %" PRIu64, &(mstat->total_inactive_file)); | |
1021 | } else if (startswith(line, unified ? "active_file" : "total_active_file")) { | |
1022 | sscanf(line, unified ? "active_file %" PRIu64 : "total_active_file %" PRIu64, &(mstat->total_active_file)); | |
1023 | } else if (startswith(line, unified ? "unevictable" : "total_unevictable")) { | |
1024 | sscanf(line, unified ? "unevictable %" PRIu64 : "total_unevictable %" PRIu64, &(mstat->total_unevictable)); | |
1f5596dd | 1025 | } |
1f5596dd | 1026 | } |
acff9786 CB |
1027 | |
1028 | return true; | |
1f5596dd CB |
1029 | } |
1030 | ||
1f5596dd CB |
1031 | static int proc_meminfo_read(char *buf, size_t size, off_t offset, |
1032 | struct fuse_file_info *fi) | |
1033 | { | |
1034 | __do_free char *cgroup = NULL, *line = NULL, | |
1035 | *memusage_str = NULL, *memstat_str = NULL, | |
1036 | *memswlimit_str = NULL, *memswusage_str = NULL; | |
757a63e7 | 1037 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
1038 | __do_fclose FILE *f = NULL; |
1039 | struct fuse_context *fc = fuse_get_context(); | |
9973cc06 | 1040 | struct lxcfs_opts *opts = (struct lxcfs_opts *)fuse_get_context()->private_data; |
99b183fb | 1041 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
acff9786 CB |
1042 | uint64_t memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0, |
1043 | hosttotal = 0; | |
334a14f9 | 1044 | struct memory_stat mstat = {}; |
1f5596dd CB |
1045 | size_t linelen = 0, total_len = 0; |
1046 | char *cache = d->buf; | |
1047 | size_t cache_size = d->buflen; | |
1048 | int ret; | |
1049 | ||
1050 | if (offset) { | |
1051 | int left; | |
1052 | ||
1053 | if (offset > d->size) | |
1054 | return -EINVAL; | |
1055 | ||
1056 | if (!d->cached) | |
1057 | return 0; | |
1058 | ||
1059 | left = d->size - offset; | |
1060 | total_len = left > size ? size : left; | |
1061 | memcpy(buf, cache + offset, total_len); | |
1062 | ||
1063 | return total_len; | |
1064 | } | |
1065 | ||
1066 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
1067 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
1068 | initpid = fc->pid; | |
1069 | ||
1070 | cgroup = get_pid_cgroup(initpid, "memory"); | |
1071 | if (!cgroup) | |
1072 | return read_file_fuse("/proc/meminfo", buf, size, d); | |
1073 | ||
1074 | prune_init_slice(cgroup); | |
1075 | ||
1076 | memlimit = get_min_memlimit(cgroup, false); | |
1077 | ||
1078 | ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str); | |
1079 | if (ret < 0) | |
1080 | return 0; | |
1081 | ||
acff9786 | 1082 | if (!cgroup_parse_memory_stat(cgroup, &mstat)) |
1f5596dd | 1083 | return 0; |
1f5596dd CB |
1084 | |
1085 | /* | |
1086 | * Following values are allowed to fail, because swapaccount might be | |
1087 | * turned off for current kernel. | |
1088 | */ | |
1089 | ret = cgroup_ops->get_memory_swap_max(cgroup_ops, cgroup, &memswlimit_str); | |
1090 | if (ret >= 0) | |
1091 | ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str); | |
1092 | if (ret >= 0) { | |
1093 | memswlimit = get_min_memlimit(cgroup, true); | |
1094 | memswusage = strtoul(memswusage_str, NULL, 10); | |
1095 | memswlimit = memswlimit / 1024; | |
1096 | memswusage = memswusage / 1024; | |
1097 | } | |
1098 | ||
1099 | memusage = strtoul(memusage_str, NULL, 10); | |
1100 | memlimit /= 1024; | |
1101 | memusage /= 1024; | |
1102 | ||
757a63e7 | 1103 | f = fopen_cached("/proc/meminfo", "re", &fopen_cache); |
1f5596dd CB |
1104 | if (!f) |
1105 | return 0; | |
1106 | ||
1107 | while (getline(&line, &linelen, f) != -1) { | |
1108 | ssize_t l; | |
1109 | char *printme, lbuf[100]; | |
1110 | ||
1111 | memset(lbuf, 0, 100); | |
1112 | if (startswith(line, "MemTotal:")) { | |
acff9786 | 1113 | sscanf(line+sizeof("MemTotal:")-1, "%" PRIu64, &hosttotal); |
1f5596dd CB |
1114 | if (hosttotal < memlimit) |
1115 | memlimit = hosttotal; | |
acff9786 | 1116 | snprintf(lbuf, 100, "MemTotal: %8" PRIu64 " kB\n", memlimit); |
1f5596dd CB |
1117 | printme = lbuf; |
1118 | } else if (startswith(line, "MemFree:")) { | |
acff9786 | 1119 | snprintf(lbuf, 100, "MemFree: %8" PRIu64 " kB\n", memlimit - memusage); |
1f5596dd CB |
1120 | printme = lbuf; |
1121 | } else if (startswith(line, "MemAvailable:")) { | |
acff9786 | 1122 | snprintf(lbuf, 100, "MemAvailable: %8" PRIu64 " kB\n", memlimit - memusage + mstat.total_cache / 1024); |
1f5596dd CB |
1123 | printme = lbuf; |
1124 | } else if (startswith(line, "SwapTotal:") && memswlimit > 0 && | |
1125 | opts && opts->swap_off == false) { | |
b7b548fe CB |
1126 | memswlimit -= memlimit; |
1127 | snprintf(lbuf, 100, "SwapTotal: %8" PRIu64 " kB\n", memswlimit); | |
1f5596dd CB |
1128 | printme = lbuf; |
1129 | } else if (startswith(line, "SwapTotal:") && opts && opts->swap_off == true) { | |
acff9786 | 1130 | snprintf(lbuf, 100, "SwapTotal: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1131 | printme = lbuf; |
1132 | } else if (startswith(line, "SwapFree:") && memswlimit > 0 && | |
1133 | memswusage > 0 && opts && opts->swap_off == false) { | |
acff9786 CB |
1134 | uint64_t swaptotal = memswlimit, |
1135 | swapusage = memusage > memswusage | |
1136 | ? 0 | |
1137 | : memswusage - memusage, | |
1138 | swapfree = swapusage < swaptotal | |
1139 | ? swaptotal - swapusage | |
1140 | : 0; | |
1141 | snprintf(lbuf, 100, "SwapFree: %8" PRIu64 " kB\n", swapfree); | |
1f5596dd CB |
1142 | printme = lbuf; |
1143 | } else if (startswith(line, "SwapFree:") && opts && opts->swap_off == true) { | |
acff9786 | 1144 | snprintf(lbuf, 100, "SwapFree: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1145 | printme = lbuf; |
1146 | } else if (startswith(line, "Slab:")) { | |
acff9786 | 1147 | snprintf(lbuf, 100, "Slab: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1148 | printme = lbuf; |
1149 | } else if (startswith(line, "Buffers:")) { | |
acff9786 | 1150 | snprintf(lbuf, 100, "Buffers: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1151 | printme = lbuf; |
1152 | } else if (startswith(line, "Cached:")) { | |
acff9786 CB |
1153 | snprintf(lbuf, 100, "Cached: %8" PRIu64 " kB\n", |
1154 | mstat.total_cache / 1024); | |
1f5596dd CB |
1155 | printme = lbuf; |
1156 | } else if (startswith(line, "SwapCached:")) { | |
acff9786 | 1157 | snprintf(lbuf, 100, "SwapCached: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1158 | printme = lbuf; |
1159 | } else if (startswith(line, "Active:")) { | |
acff9786 CB |
1160 | snprintf(lbuf, 100, "Active: %8" PRIu64 " kB\n", |
1161 | (mstat.total_active_anon + | |
1162 | mstat.total_active_file) / | |
1163 | 1024); | |
1f5596dd CB |
1164 | printme = lbuf; |
1165 | } else if (startswith(line, "Inactive:")) { | |
acff9786 CB |
1166 | snprintf(lbuf, 100, "Inactive: %8" PRIu64 " kB\n", |
1167 | (mstat.total_inactive_anon + | |
1168 | mstat.total_inactive_file) / | |
1169 | 1024); | |
1f5596dd CB |
1170 | printme = lbuf; |
1171 | } else if (startswith(line, "Active(anon)")) { | |
acff9786 CB |
1172 | snprintf(lbuf, 100, "Active(anon): %8" PRIu64 " kB\n", |
1173 | mstat.total_active_anon / 1024); | |
1f5596dd CB |
1174 | printme = lbuf; |
1175 | } else if (startswith(line, "Inactive(anon)")) { | |
acff9786 CB |
1176 | snprintf(lbuf, 100, "Inactive(anon): %8" PRIu64 " kB\n", |
1177 | mstat.total_inactive_anon / 1024); | |
1f5596dd CB |
1178 | printme = lbuf; |
1179 | } else if (startswith(line, "Active(file)")) { | |
acff9786 CB |
1180 | snprintf(lbuf, 100, "Active(file): %8" PRIu64 " kB\n", |
1181 | mstat.total_active_file / 1024); | |
1f5596dd CB |
1182 | printme = lbuf; |
1183 | } else if (startswith(line, "Inactive(file)")) { | |
acff9786 CB |
1184 | snprintf(lbuf, 100, "Inactive(file): %8" PRIu64 " kB\n", |
1185 | mstat.total_inactive_file / 1024); | |
1f5596dd CB |
1186 | printme = lbuf; |
1187 | } else if (startswith(line, "Unevictable")) { | |
acff9786 CB |
1188 | snprintf(lbuf, 100, "Unevictable: %8" PRIu64 " kB\n", |
1189 | mstat.total_unevictable / 1024); | |
1190 | printme = lbuf; | |
1191 | } else if (startswith(line, "Dirty")) { | |
1192 | snprintf(lbuf, 100, "Dirty: %8" PRIu64 " kB\n", | |
1193 | mstat.total_dirty / 1024); | |
1194 | printme = lbuf; | |
1195 | } else if (startswith(line, "Writeback")) { | |
1196 | snprintf(lbuf, 100, "Writeback: %8" PRIu64 " kB\n", | |
1197 | mstat.total_writeback / 1024); | |
1198 | printme = lbuf; | |
1199 | } else if (startswith(line, "AnonPages")) { | |
1200 | snprintf(lbuf, 100, "AnonPages: %8" PRIu64 " kB\n", | |
1201 | (mstat.total_active_anon + | |
1202 | mstat.total_inactive_anon - mstat.total_shmem) / | |
1203 | 1024); | |
1204 | printme = lbuf; | |
1205 | } else if (startswith(line, "Mapped")) { | |
1206 | snprintf(lbuf, 100, "Mapped: %8" PRIu64 " kB\n", | |
1207 | mstat.total_mapped_file / 1024); | |
1f5596dd CB |
1208 | printme = lbuf; |
1209 | } else if (startswith(line, "SReclaimable")) { | |
acff9786 | 1210 | snprintf(lbuf, 100, "SReclaimable: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1211 | printme = lbuf; |
1212 | } else if (startswith(line, "SUnreclaim")) { | |
acff9786 | 1213 | snprintf(lbuf, 100, "SUnreclaim: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1214 | printme = lbuf; |
1215 | } else if (startswith(line, "Shmem:")) { | |
acff9786 CB |
1216 | snprintf(lbuf, 100, "Shmem: %8" PRIu64 " kB\n", |
1217 | mstat.total_shmem / 1024); | |
1f5596dd CB |
1218 | printme = lbuf; |
1219 | } else if (startswith(line, "ShmemHugePages")) { | |
acff9786 | 1220 | snprintf(lbuf, 100, "ShmemHugePages: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1221 | printme = lbuf; |
1222 | } else if (startswith(line, "ShmemPmdMapped")) { | |
acff9786 CB |
1223 | snprintf(lbuf, 100, "ShmemPmdMapped: %8" PRIu64 " kB\n", (uint64_t)0); |
1224 | printme = lbuf; | |
1225 | } else if (startswith(line, "AnonHugePages")) { | |
1226 | snprintf(lbuf, 100, "AnonHugePages: %8" PRIu64 " kB\n", | |
1227 | mstat.total_rss_huge / 1024); | |
1f5596dd | 1228 | printme = lbuf; |
acff9786 CB |
1229 | } else { |
1230 | printme = line; | |
1231 | } | |
1f5596dd CB |
1232 | |
1233 | l = snprintf(cache, cache_size, "%s", printme); | |
1234 | if (l < 0) { | |
1235 | perror("Error writing to cache"); | |
1236 | return 0; | |
1237 | ||
1238 | } | |
1239 | if (l >= cache_size) { | |
1240 | lxcfs_error("%s\n", "Internal error: truncated write to cache."); | |
1241 | return 0; | |
1242 | } | |
1243 | ||
1244 | cache += l; | |
1245 | cache_size -= l; | |
1246 | total_len += l; | |
1247 | } | |
1248 | ||
1249 | d->cached = 1; | |
1250 | d->size = total_len; | |
1251 | if (total_len > size ) total_len = size; | |
1252 | memcpy(buf, d->buf, total_len); | |
1253 | ||
1254 | return total_len; | |
1255 | } | |
1256 | ||
1257 | int proc_read(const char *path, char *buf, size_t size, off_t offset, | |
99b183fb | 1258 | struct fuse_file_info *fi) |
1f5596dd | 1259 | { |
99b183fb | 1260 | struct file_info *f = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
1261 | |
1262 | switch (f->type) { | |
1263 | case LXC_TYPE_PROC_MEMINFO: | |
1264 | return proc_meminfo_read(buf, size, offset, fi); | |
1265 | case LXC_TYPE_PROC_CPUINFO: | |
1266 | return proc_cpuinfo_read(buf, size, offset, fi); | |
1267 | case LXC_TYPE_PROC_UPTIME: | |
1268 | return proc_uptime_read(buf, size, offset, fi); | |
1269 | case LXC_TYPE_PROC_STAT: | |
1270 | return proc_stat_read(buf, size, offset, fi); | |
1271 | case LXC_TYPE_PROC_DISKSTATS: | |
1272 | return proc_diskstats_read(buf, size, offset, fi); | |
1273 | case LXC_TYPE_PROC_SWAPS: | |
1274 | return proc_swaps_read(buf, size, offset, fi); | |
1275 | case LXC_TYPE_PROC_LOADAVG: | |
1276 | return proc_loadavg_read(buf, size, offset, fi); | |
1f5596dd | 1277 | } |
99b183fb CB |
1278 | |
1279 | return -EINVAL; | |
1f5596dd | 1280 | } |