]>
Commit | Line | Data |
---|---|---|
db0463bf | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
1f5596dd | 2 | |
f834b6bf SP |
3 | #include "config.h" |
4 | ||
1f5596dd CB |
5 | #include <dirent.h> |
6 | #include <errno.h> | |
7 | #include <fcntl.h> | |
1f5596dd CB |
8 | #include <inttypes.h> |
9 | #include <libgen.h> | |
10 | #include <pthread.h> | |
11 | #include <sched.h> | |
12 | #include <stdarg.h> | |
13 | #include <stdbool.h> | |
14 | #include <stdint.h> | |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <string.h> | |
18 | #include <time.h> | |
19 | #include <unistd.h> | |
20 | #include <wait.h> | |
21 | #include <linux/magic.h> | |
22 | #include <linux/sched.h> | |
23 | #include <sys/epoll.h> | |
24 | #include <sys/mman.h> | |
25 | #include <sys/mount.h> | |
26 | #include <sys/param.h> | |
27 | #include <sys/socket.h> | |
28 | #include <sys/syscall.h> | |
29 | #include <sys/sysinfo.h> | |
30 | #include <sys/vfs.h> | |
31 | ||
e01afbb7 CB |
32 | #include "proc_fuse.h" |
33 | ||
1f5596dd | 34 | #include "bindings.h" |
1f5596dd CB |
35 | #include "cgroup_fuse.h" |
36 | #include "cgroups/cgroup.h" | |
37 | #include "cgroups/cgroup_utils.h" | |
4ec5c9da | 38 | #include "cpuset_parse.h" |
ec2043ed | 39 | #include "lxcfs_fuse_compat.h" |
1f5596dd CB |
40 | #include "memory_utils.h" |
41 | #include "proc_loadavg.h" | |
4ec5c9da | 42 | #include "proc_cpuview.h" |
1f5596dd CB |
43 | #include "utils.h" |
44 | ||
acff9786 CB |
45 | struct memory_stat { |
46 | uint64_t hierarchical_memory_limit; | |
47 | uint64_t hierarchical_memsw_limit; | |
48 | uint64_t total_cache; | |
49 | uint64_t total_rss; | |
50 | uint64_t total_rss_huge; | |
51 | uint64_t total_shmem; | |
52 | uint64_t total_mapped_file; | |
53 | uint64_t total_dirty; | |
54 | uint64_t total_writeback; | |
55 | uint64_t total_swap; | |
56 | uint64_t total_pgpgin; | |
57 | uint64_t total_pgpgout; | |
58 | uint64_t total_pgfault; | |
59 | uint64_t total_pgmajfault; | |
60 | uint64_t total_inactive_anon; | |
61 | uint64_t total_active_anon; | |
62 | uint64_t total_inactive_file; | |
63 | uint64_t total_active_file; | |
64 | uint64_t total_unevictable; | |
65 | }; | |
66 | ||
ce554964 SP |
67 | static off_t get_procfile_size(const char *path) |
68 | { | |
69 | __do_fclose FILE *f = NULL; | |
70 | __do_free char *line = NULL; | |
71 | size_t len = 0; | |
72 | ssize_t sz, answer = 0; | |
73 | ||
74 | f = fopen(path, "re"); | |
75 | if (!f) | |
76 | return 0; | |
77 | ||
78 | while ((sz = getline(&line, &len, f)) != -1) | |
79 | answer += sz; | |
80 | ||
81 | return answer; | |
82 | } | |
83 | ||
2d7bcab7 | 84 | __lxcfs_fuse_ops int proc_getattr(const char *path, struct stat *sb) |
1f5596dd CB |
85 | { |
86 | struct timespec now; | |
87 | ||
88 | memset(sb, 0, sizeof(struct stat)); | |
89 | if (clock_gettime(CLOCK_REALTIME, &now) < 0) | |
90 | return -EINVAL; | |
f75d5b75 | 91 | |
1f5596dd CB |
92 | sb->st_uid = sb->st_gid = 0; |
93 | sb->st_atim = sb->st_mtim = sb->st_ctim = now; | |
94 | if (strcmp(path, "/proc") == 0) { | |
95 | sb->st_mode = S_IFDIR | 00555; | |
96 | sb->st_nlink = 2; | |
97 | return 0; | |
98 | } | |
f75d5b75 CB |
99 | |
100 | if (strcmp(path, "/proc/meminfo") == 0 || | |
101 | strcmp(path, "/proc/cpuinfo") == 0 || | |
102 | strcmp(path, "/proc/uptime") == 0 || | |
103 | strcmp(path, "/proc/stat") == 0 || | |
104 | strcmp(path, "/proc/diskstats") == 0 || | |
105 | strcmp(path, "/proc/swaps") == 0 || | |
6cc153e6 FS |
106 | strcmp(path, "/proc/loadavg") == 0 || |
107 | strcmp(path, "/proc/slabinfo") == 0) { | |
ce554964 | 108 | sb->st_size = get_procfile_size(path); |
1f5596dd CB |
109 | sb->st_mode = S_IFREG | 00444; |
110 | sb->st_nlink = 1; | |
111 | return 0; | |
112 | } | |
113 | ||
114 | return -ENOENT; | |
115 | } | |
116 | ||
2d7bcab7 CB |
117 | __lxcfs_fuse_ops int proc_readdir(const char *path, void *buf, |
118 | fuse_fill_dir_t filler, off_t offset, | |
119 | struct fuse_file_info *fi) | |
1f5596dd | 120 | { |
f834b6bf SP |
121 | if (DIR_FILLER(filler, buf, ".", NULL, 0) != 0 || |
122 | DIR_FILLER(filler, buf, "..", NULL, 0) != 0 || | |
123 | DIR_FILLER(filler, buf, "cpuinfo", NULL, 0) != 0 || | |
124 | DIR_FILLER(filler, buf, "meminfo", NULL, 0) != 0 || | |
125 | DIR_FILLER(filler, buf, "stat", NULL, 0) != 0 || | |
126 | DIR_FILLER(filler, buf, "uptime", NULL, 0) != 0 || | |
127 | DIR_FILLER(filler, buf, "diskstats", NULL, 0) != 0 || | |
128 | DIR_FILLER(filler, buf, "swaps", NULL, 0) != 0 || | |
6cc153e6 FS |
129 | DIR_FILLER(filler, buf, "loadavg", NULL, 0) != 0 || |
130 | DIR_FILLER(filler, buf, "slabinfo", NULL, 0) != 0) | |
1f5596dd CB |
131 | return -EINVAL; |
132 | ||
133 | return 0; | |
134 | } | |
135 | ||
2d7bcab7 | 136 | __lxcfs_fuse_ops int proc_open(const char *path, struct fuse_file_info *fi) |
1f5596dd | 137 | { |
700dd417 | 138 | __do_free struct file_info *info = NULL; |
1f5596dd | 139 | int type = -1; |
1f5596dd CB |
140 | |
141 | if (strcmp(path, "/proc/meminfo") == 0) | |
142 | type = LXC_TYPE_PROC_MEMINFO; | |
143 | else if (strcmp(path, "/proc/cpuinfo") == 0) | |
144 | type = LXC_TYPE_PROC_CPUINFO; | |
145 | else if (strcmp(path, "/proc/uptime") == 0) | |
146 | type = LXC_TYPE_PROC_UPTIME; | |
147 | else if (strcmp(path, "/proc/stat") == 0) | |
148 | type = LXC_TYPE_PROC_STAT; | |
149 | else if (strcmp(path, "/proc/diskstats") == 0) | |
150 | type = LXC_TYPE_PROC_DISKSTATS; | |
151 | else if (strcmp(path, "/proc/swaps") == 0) | |
152 | type = LXC_TYPE_PROC_SWAPS; | |
153 | else if (strcmp(path, "/proc/loadavg") == 0) | |
154 | type = LXC_TYPE_PROC_LOADAVG; | |
6cc153e6 FS |
155 | else if (strcmp(path, "/proc/slabinfo") == 0) |
156 | type = LXC_TYPE_PROC_SLABINFO; | |
1f5596dd CB |
157 | if (type == -1) |
158 | return -ENOENT; | |
159 | ||
f1a33645 | 160 | info = zalloc(sizeof(*info)); |
1f5596dd CB |
161 | if (!info) |
162 | return -ENOMEM; | |
163 | ||
1f5596dd CB |
164 | info->type = type; |
165 | ||
166 | info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE; | |
167 | ||
f1a33645 | 168 | info->buf = zalloc(info->buflen); |
1f5596dd CB |
169 | if (!info->buf) |
170 | return -ENOMEM; | |
1f5596dd CB |
171 | /* set actual size to buffer size */ |
172 | info->size = info->buflen; | |
173 | ||
700dd417 | 174 | fi->fh = PTR_TO_UINT64(move_ptr(info)); |
1f5596dd CB |
175 | return 0; |
176 | } | |
177 | ||
2d7bcab7 | 178 | __lxcfs_fuse_ops int proc_access(const char *path, int mask) |
1f5596dd CB |
179 | { |
180 | if (strcmp(path, "/proc") == 0 && access(path, R_OK) == 0) | |
181 | return 0; | |
182 | ||
183 | /* these are all read-only */ | |
184 | if ((mask & ~R_OK) != 0) | |
185 | return -EACCES; | |
f75d5b75 | 186 | |
1f5596dd CB |
187 | return 0; |
188 | } | |
189 | ||
2d7bcab7 | 190 | __lxcfs_fuse_ops int proc_release(const char *path, struct fuse_file_info *fi) |
1f5596dd CB |
191 | { |
192 | do_release_file_info(fi); | |
193 | return 0; | |
194 | } | |
195 | ||
e9712933 | 196 | static uint64_t get_memlimit(const char *cgroup, bool swap) |
1f5596dd | 197 | { |
1f5596dd | 198 | __do_free char *memlimit_str = NULL; |
c83158f2 | 199 | uint64_t memlimit = 0; |
f75d5b75 | 200 | int ret; |
1f5596dd CB |
201 | |
202 | if (swap) | |
203 | ret = cgroup_ops->get_memory_swap_max(cgroup_ops, cgroup, &memlimit_str); | |
204 | else | |
205 | ret = cgroup_ops->get_memory_max(cgroup_ops, cgroup, &memlimit_str); | |
ee1a885f | 206 | if (ret > 0 && memlimit_str[0] && safe_uint64(memlimit_str, &memlimit, 10) < 0) |
c83158f2 | 207 | lxcfs_error("Failed to convert memlimit %s", memlimit_str); |
1f5596dd CB |
208 | |
209 | return memlimit; | |
210 | } | |
211 | ||
6f88ab0c JS |
212 | /* |
213 | * This function taken from glibc-2.32, as POSIX dirname("/some-dir") will | |
214 | * return "/some-dir" as opposed to "/", which breaks `get_min_memlimit()` | |
215 | */ | |
216 | static char *gnu_dirname(char *path) | |
217 | { | |
218 | static const char dot[] = "."; | |
219 | char *last_slash; | |
220 | ||
221 | /* Find last '/'. */ | |
222 | last_slash = path != NULL ? strrchr(path, '/') : NULL; | |
223 | ||
224 | if (last_slash != NULL && last_slash != path && last_slash[1] == '\0') { | |
225 | /* Determine whether all remaining characters are slashes. */ | |
226 | char *runp; | |
227 | ||
228 | for (runp = last_slash; runp != path; --runp) | |
229 | if (runp[-1] != '/') | |
230 | break; | |
231 | ||
232 | /* The '/' is the last character, we have to look further. */ | |
233 | if (runp != path) | |
234 | last_slash = memrchr(path, '/', runp - path); | |
235 | } | |
236 | ||
237 | if (last_slash != NULL) { | |
238 | /* Determine whether all remaining characters are slashes. */ | |
239 | char *runp; | |
240 | ||
241 | for (runp = last_slash; runp != path; --runp) | |
242 | if (runp[-1] != '/') | |
243 | break; | |
244 | ||
245 | /* Terminate the path. */ | |
246 | if (runp == path) { | |
247 | /* | |
248 | * The last slash is the first character in the string. | |
249 | * We have to return "/". As a special case we have to | |
250 | * return "//" if there are exactly two slashes at the | |
251 | * beginning of the string. See XBD 4.10 Path Name | |
252 | * Resolution for more information | |
253 | */ | |
254 | if (last_slash == path + 1) | |
255 | ++last_slash; | |
256 | else | |
257 | last_slash = path + 1; | |
258 | } else | |
259 | last_slash = runp; | |
260 | ||
261 | last_slash[0] = '\0'; | |
262 | } else { | |
263 | /* | |
264 | * This assignment is ill-designed but the XPG specs require to | |
265 | * return a string containing "." in any case no directory part | |
266 | * is found and so a static and constant string is required. | |
267 | */ | |
268 | path = (char *)dot; | |
269 | } | |
270 | ||
271 | return path; | |
272 | } | |
273 | ||
e9712933 | 274 | static uint64_t get_min_memlimit(const char *cgroup, bool swap) |
1f5596dd CB |
275 | { |
276 | __do_free char *copy = NULL; | |
c83158f2 | 277 | uint64_t memlimit = 0, retlimit = 0; |
1f5596dd CB |
278 | |
279 | copy = strdup(cgroup); | |
f75d5b75 CB |
280 | if (!copy) |
281 | return log_error_errno(0, ENOMEM, "Failed to allocate memory"); | |
282 | ||
1f5596dd CB |
283 | retlimit = get_memlimit(copy, swap); |
284 | ||
6f88ab0c JS |
285 | /* |
286 | * If the cgroup doesn't start with / (probably won't happen), dirname() | |
287 | * will terminate with "" instead of "/" | |
288 | */ | |
289 | while (*copy && strcmp(copy, "/") != 0) { | |
1f5596dd CB |
290 | char *it = copy; |
291 | ||
6f88ab0c | 292 | it = gnu_dirname(it); |
1f5596dd | 293 | memlimit = get_memlimit(it, swap); |
c83158f2 | 294 | if (memlimit > 0 && memlimit < retlimit) |
1f5596dd CB |
295 | retlimit = memlimit; |
296 | }; | |
297 | ||
298 | return retlimit; | |
299 | } | |
300 | ||
f75d5b75 | 301 | static inline bool startswith(const char *line, const char *pref) |
4ec5c9da | 302 | { |
f75d5b75 | 303 | return strncmp(line, pref, strlen(pref)) == 0; |
4ec5c9da CB |
304 | } |
305 | ||
1f5596dd CB |
306 | static int proc_swaps_read(char *buf, size_t size, off_t offset, |
307 | struct fuse_file_info *fi) | |
308 | { | |
362d1193 SG |
309 | __do_free char *cgroup = NULL, *memusage_str = NULL, |
310 | *memswusage_str = NULL, *memswpriority_str = NULL; | |
1f5596dd | 311 | struct fuse_context *fc = fuse_get_context(); |
84e184b1 | 312 | bool wants_swap = lxcfs_has_opt(fuse_get_context()->private_data, LXCFS_SWAP_ON); |
99b183fb | 313 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
e9712933 | 314 | uint64_t memswlimit = 0, memlimit = 0, memusage = 0, memswusage = 0, |
79af0cd1 | 315 | swtotal = 0, swusage = 0, memswpriority = 1, |
3ce66074 | 316 | hostswtotal = 0, hostswfree = 0; |
1f5596dd CB |
317 | ssize_t total_len = 0; |
318 | ssize_t l = 0; | |
319 | char *cache = d->buf; | |
320 | int ret; | |
3ce66074 SG |
321 | __do_free char *line = NULL; |
322 | __do_free void *fopen_cache = NULL; | |
323 | __do_fclose FILE *f = NULL; | |
324 | size_t linelen = 0; | |
1f5596dd CB |
325 | |
326 | if (offset) { | |
3cf1e562 | 327 | size_t left; |
1f5596dd CB |
328 | |
329 | if (offset > d->size) | |
330 | return -EINVAL; | |
331 | ||
332 | if (!d->cached) | |
333 | return 0; | |
334 | ||
335 | left = d->size - offset; | |
336 | total_len = left > size ? size: left; | |
337 | memcpy(buf, cache + offset, total_len); | |
338 | ||
339 | return total_len; | |
340 | } | |
341 | ||
342 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
343 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
344 | initpid = fc->pid; | |
a9f0d623 | 345 | |
b7b018d0 CB |
346 | cgroup = get_pid_cgroup(initpid, "memory"); |
347 | if (!cgroup) | |
1f5596dd | 348 | return read_file_fuse("/proc/swaps", buf, size, d); |
b7b018d0 | 349 | prune_init_slice(cgroup); |
1f5596dd | 350 | |
b7b018d0 | 351 | memlimit = get_min_memlimit(cgroup, false); |
1f5596dd | 352 | |
b7b018d0 | 353 | ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str); |
1f5596dd CB |
354 | if (ret < 0) |
355 | return 0; | |
356 | ||
c83158f2 CB |
357 | if (safe_uint64(memusage_str, &memusage, 10) < 0) |
358 | lxcfs_error("Failed to convert memusage %s", memusage_str); | |
1f5596dd | 359 | |
2f2080c1 | 360 | if (wants_swap) { |
b7b018d0 CB |
361 | memswlimit = get_min_memlimit(cgroup, true); |
362 | if (memswlimit > 0) { | |
363 | ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str); | |
364 | if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) { | |
365 | if (memlimit > memswlimit) | |
366 | swtotal = 0; | |
367 | else | |
368 | swtotal = (memswlimit - memlimit) / 1024; | |
369 | if (memusage > memswusage || swtotal == 0) | |
370 | swusage = 0; | |
371 | else | |
372 | swusage = (memswusage - memusage) / 1024; | |
b7b018d0 | 373 | } |
362d1193 SG |
374 | |
375 | ret = cgroup_ops->get_memory_swappiness(cgroup_ops, cgroup, &memswpriority_str); | |
376 | if (ret >= 0) | |
377 | safe_uint64(memswpriority_str, &memswpriority, 10); | |
2f2080c1 | 378 | } |
1f5596dd CB |
379 | } |
380 | ||
381 | total_len = snprintf(d->buf, d->size, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n"); | |
382 | ||
3ce66074 SG |
383 | /* Read host total and free values */ |
384 | f = fopen_cached("/proc/meminfo", "re", &fopen_cache); | |
385 | if (!f) | |
386 | return 0; | |
1f5596dd | 387 | |
3ce66074 SG |
388 | while (getline(&line, &linelen, f) != -1) { |
389 | if (startswith(line, "SwapTotal:")) | |
390 | sscanf(line, "SwapTotal: %8" PRIu64 " kB", &hostswtotal); | |
391 | else if (startswith(line, "SwapFree:")) | |
392 | sscanf(line, "SwapFree: %8" PRIu64 " kB", &hostswfree); | |
393 | } | |
1f5596dd | 394 | |
3ce66074 SG |
395 | if (wants_swap) { |
396 | /* The total amount of swap is always reported to be the | |
397 | lesser of the RAM+SWAP limit or the SWAP device size. | |
398 | This is because the kernel can swap as much as it | |
399 | wants and not only up to swtotal. */ | |
400 | swtotal = memlimit / 1024 + swtotal; | |
401 | if (hostswtotal < swtotal) { | |
402 | swtotal = hostswtotal; | |
1f5596dd | 403 | } |
1f5596dd | 404 | |
3ce66074 SG |
405 | /* When swappiness is 0, pretend we can't swap. */ |
406 | if (memswpriority == 0) { | |
407 | swtotal = swusage; | |
408 | } | |
362d1193 SG |
409 | } |
410 | ||
b7b018d0 | 411 | if (swtotal > 0) { |
1f5596dd | 412 | l = snprintf(d->buf + total_len, d->size - total_len, |
e9712933 | 413 | "none%*svirtual\t\t%" PRIu64 "\t%" PRIu64 "\t0\n", |
7cbfbc74 | 414 | 36, " ", swtotal, swusage); |
1f5596dd CB |
415 | total_len += l; |
416 | } | |
417 | ||
f75d5b75 CB |
418 | if (total_len < 0 || l < 0) |
419 | return log_error(0, "Failed writing to cache"); | |
1f5596dd CB |
420 | |
421 | d->cached = 1; | |
422 | d->size = (int)total_len; | |
423 | ||
3cf1e562 | 424 | if ((size_t)total_len > size) |
f75d5b75 | 425 | total_len = size; |
1f5596dd | 426 | memcpy(buf, d->buf, total_len); |
f75d5b75 | 427 | |
1f5596dd CB |
428 | return total_len; |
429 | } | |
430 | ||
431 | static void get_blkio_io_value(char *str, unsigned major, unsigned minor, | |
1ba088ae | 432 | char *iotype, uint64_t *v) |
1f5596dd CB |
433 | { |
434 | char *eol; | |
435 | char key[32]; | |
f75d5b75 | 436 | size_t len; |
1f5596dd CB |
437 | |
438 | memset(key, 0, 32); | |
439 | snprintf(key, 32, "%u:%u %s", major, minor, iotype); | |
440 | ||
1f5596dd | 441 | *v = 0; |
f75d5b75 | 442 | len = strlen(key); |
1f5596dd CB |
443 | while (*str) { |
444 | if (startswith(str, key)) { | |
445 | sscanf(str + len, "%lu", v); | |
446 | return; | |
447 | } | |
448 | eol = strchr(str, '\n'); | |
449 | if (!eol) | |
450 | return; | |
451 | str = eol+1; | |
452 | } | |
453 | } | |
454 | ||
998cdfc9 CB |
455 | struct lxcfs_diskstats { |
456 | unsigned int major; /* 1 - major number */ | |
457 | unsigned int minor; /* 2 - minor mumber */ | |
458 | char dev_name[72]; /* 3 - device name */ | |
459 | uint64_t read; /* 4 - reads completed successfully */ | |
460 | uint64_t read_merged; /* 5 - reads merged */ | |
461 | uint64_t read_sectors; /* 6 - sectors read */ | |
462 | uint64_t read_ticks; /* 7 - time spent reading (ms) */ | |
463 | uint64_t write; /* 8 - writes completed */ | |
464 | uint64_t write_merged; /* 9 - writes merged */ | |
465 | uint64_t write_sectors; /* 10 - sectors written */ | |
466 | uint64_t write_ticks; /* 11 - time spent writing (ms) */ | |
467 | uint64_t ios_pgr; /* 12 - I/Os currently in progress */ | |
468 | uint64_t total_ticks; /* 13 - time spent doing I/Os (ms) */ | |
469 | uint64_t rq_ticks; /* 14 - weighted time spent doing I/Os (ms) */ | |
470 | uint64_t discard; /* 15 - discards completed successfully (4.18+) */ | |
471 | uint64_t discard_merged; /* 16 - discards merged (4.18+) */ | |
472 | uint64_t discard_sectors; /* 17 - sectors discarded (4.18+) */ | |
473 | uint64_t discard_ticks; /* 18 - time spent discarding (4.18+) */ | |
474 | }; | |
475 | ||
1f5596dd CB |
476 | static int proc_diskstats_read(char *buf, size_t size, off_t offset, |
477 | struct fuse_file_info *fi) | |
478 | { | |
479 | __do_free char *cg = NULL, *io_serviced_str = NULL, | |
480 | *io_merged_str = NULL, *io_service_bytes_str = NULL, | |
481 | *io_wait_time_str = NULL, *io_service_time_str = NULL, | |
482 | *line = NULL; | |
757a63e7 | 483 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
484 | __do_fclose FILE *f = NULL; |
485 | struct fuse_context *fc = fuse_get_context(); | |
99b183fb | 486 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
998cdfc9 CB |
487 | struct lxcfs_diskstats stats = {}; |
488 | /* helper fields */ | |
489 | uint64_t read_service_time, write_service_time, discard_service_time, read_wait_time, | |
490 | write_wait_time, discard_wait_time; | |
1f5596dd CB |
491 | char *cache = d->buf; |
492 | size_t cache_size = d->buflen; | |
493 | size_t linelen = 0, total_len = 0; | |
1f5596dd CB |
494 | int i = 0; |
495 | int ret; | |
1f5596dd | 496 | |
cbfc55fd | 497 | if (offset) { |
3cf1e562 | 498 | size_t left; |
1f5596dd CB |
499 | |
500 | if (offset > d->size) | |
501 | return -EINVAL; | |
502 | ||
503 | if (!d->cached) | |
504 | return 0; | |
505 | ||
506 | left = d->size - offset; | |
507 | total_len = left > size ? size: left; | |
508 | memcpy(buf, cache + offset, total_len); | |
509 | ||
510 | return total_len; | |
511 | } | |
512 | ||
513 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
514 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
515 | initpid = fc->pid; | |
a9f0d623 | 516 | |
1f5596dd CB |
517 | cg = get_pid_cgroup(initpid, "blkio"); |
518 | if (!cg) | |
519 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
520 | prune_init_slice(cg); | |
521 | ||
522 | ret = cgroup_ops->get_io_serviced(cgroup_ops, cg, &io_serviced_str); | |
523 | if (ret < 0) { | |
524 | if (ret == -EOPNOTSUPP) | |
525 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
526 | } | |
527 | ||
528 | ret = cgroup_ops->get_io_merged(cgroup_ops, cg, &io_merged_str); | |
529 | if (ret < 0) { | |
530 | if (ret == -EOPNOTSUPP) | |
531 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
532 | } | |
533 | ||
534 | ret = cgroup_ops->get_io_service_bytes(cgroup_ops, cg, &io_service_bytes_str); | |
535 | if (ret < 0) { | |
536 | if (ret == -EOPNOTSUPP) | |
537 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
538 | } | |
539 | ||
540 | ret = cgroup_ops->get_io_wait_time(cgroup_ops, cg, &io_wait_time_str); | |
541 | if (ret < 0) { | |
542 | if (ret == -EOPNOTSUPP) | |
543 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
544 | } | |
545 | ||
546 | ret = cgroup_ops->get_io_service_time(cgroup_ops, cg, &io_service_time_str); | |
547 | if (ret < 0) { | |
548 | if (ret == -EOPNOTSUPP) | |
549 | return read_file_fuse("/proc/diskstats", buf, size, d); | |
550 | } | |
551 | ||
757a63e7 | 552 | f = fopen_cached("/proc/diskstats", "re", &fopen_cache); |
1f5596dd CB |
553 | if (!f) |
554 | return 0; | |
555 | ||
556 | while (getline(&line, &linelen, f) != -1) { | |
557 | ssize_t l; | |
558 | char lbuf[256]; | |
559 | ||
998cdfc9 | 560 | i = sscanf(line, "%u %u %71s", &stats.major, &stats.minor, stats.dev_name); |
1f5596dd CB |
561 | if (i != 3) |
562 | continue; | |
563 | ||
998cdfc9 CB |
564 | get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Read", &stats.read); |
565 | get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Write", &stats.write); | |
566 | get_blkio_io_value(io_serviced_str, stats.major, stats.minor, "Discard", &stats.discard); | |
567 | ||
568 | get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Read", &stats.read_merged); | |
569 | get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Write", &stats.write_merged); | |
570 | get_blkio_io_value(io_merged_str, stats.major, stats.minor, "Discard", &stats.discard_merged); | |
571 | ||
572 | get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Read", &stats.read_sectors); | |
573 | stats.read_sectors = stats.read_sectors / 512; | |
574 | get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Write", &stats.write_sectors); | |
575 | stats.write_sectors = stats.write_sectors / 512; | |
576 | get_blkio_io_value(io_service_bytes_str, stats.major, stats.minor, "Discard", &stats.discard_sectors); | |
577 | stats.discard_sectors = stats.discard_sectors / 512; | |
578 | ||
579 | get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Read", &read_service_time); | |
580 | read_service_time = read_service_time / 1000000; | |
581 | get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Read", &read_wait_time); | |
582 | read_wait_time = read_wait_time / 1000000; | |
583 | stats.read_ticks = read_service_time + read_wait_time; | |
584 | ||
585 | get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Write", &write_service_time); | |
586 | write_service_time = write_service_time / 1000000; | |
587 | get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Write", &write_wait_time); | |
588 | write_wait_time = write_wait_time / 1000000; | |
589 | stats.write_ticks = write_service_time + write_wait_time; | |
590 | ||
591 | get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Discard", &discard_service_time); | |
592 | discard_service_time = discard_service_time / 1000000; | |
593 | get_blkio_io_value(io_wait_time_str, stats.major, stats.minor, "Discard", &discard_wait_time); | |
594 | discard_wait_time = discard_wait_time / 1000000; | |
595 | stats.discard_ticks = discard_service_time + discard_wait_time; | |
596 | ||
597 | get_blkio_io_value(io_service_time_str, stats.major, stats.minor, "Total", &stats.total_ticks); | |
598 | stats.total_ticks = stats.total_ticks / 1000000; | |
1f5596dd CB |
599 | |
600 | memset(lbuf, 0, 256); | |
998cdfc9 CB |
601 | if (stats.read || stats.write || stats.read_merged || stats.write_merged || |
602 | stats.read_sectors || stats.write_sectors || stats.read_ticks || | |
603 | stats.write_ticks || stats.ios_pgr || stats.total_ticks || stats.rq_ticks || | |
604 | stats.discard_merged || stats.discard_sectors || stats.discard_ticks) | |
605 | snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | |
606 | stats.major, | |
607 | stats.minor, | |
608 | stats.dev_name, | |
609 | stats.read, | |
610 | stats.read_merged, | |
611 | stats.read_sectors, | |
612 | stats.read_ticks, | |
613 | stats.write, | |
614 | stats.write_merged, | |
615 | stats.write_sectors, | |
616 | stats.write_ticks, | |
617 | stats.ios_pgr, | |
618 | stats.total_ticks, | |
619 | stats.rq_ticks, | |
620 | stats.discard_merged, | |
621 | stats.discard_sectors, | |
622 | stats.discard_ticks); | |
1f5596dd CB |
623 | else |
624 | continue; | |
625 | ||
626 | l = snprintf(cache, cache_size, "%s", lbuf); | |
f75d5b75 CB |
627 | if (l < 0) |
628 | return log_error(0, "Failed to write cache"); | |
3cf1e562 | 629 | if ((size_t)l >= cache_size) |
f75d5b75 CB |
630 | return log_error(0, "Write to cache was truncated"); |
631 | ||
1f5596dd CB |
632 | cache += l; |
633 | cache_size -= l; | |
634 | total_len += l; | |
635 | } | |
636 | ||
637 | d->cached = 1; | |
638 | d->size = total_len; | |
cbfc55fd CB |
639 | if (total_len > size) |
640 | total_len = size; | |
1f5596dd CB |
641 | memcpy(buf, d->buf, total_len); |
642 | ||
643 | return total_len; | |
644 | } | |
645 | ||
b1ef0dde | 646 | #ifdef RELOADTEST |
12a60884 | 647 | static inline void iwashere(void) |
1f5596dd | 648 | { |
12a60884 | 649 | mknod("/tmp/lxcfs-iwashere", S_IFREG, 0644); |
1f5596dd CB |
650 | } |
651 | #endif | |
652 | ||
c9c93749 CB |
653 | /* |
654 | * This function retrieves the busy time of a group of tasks by looking at | |
1f5596dd CB |
655 | * cpuacct.usage. Unfortunately, this only makes sense when the container has |
656 | * been given it's own cpuacct cgroup. If not, this function will take the busy | |
657 | * time of all other taks that do not actually belong to the container into | |
658 | * account as well. If someone has a clever solution for this please send a | |
659 | * patch! | |
660 | */ | |
661 | static double get_reaper_busy(pid_t task) | |
662 | { | |
663 | __do_free char *cgroup = NULL, *usage_str = NULL; | |
e9712933 | 664 | uint64_t usage = 0; |
1f5596dd CB |
665 | pid_t initpid; |
666 | ||
667 | initpid = lookup_initpid_in_store(task); | |
668 | if (initpid <= 0) | |
669 | return 0; | |
670 | ||
671 | cgroup = get_pid_cgroup(initpid, "cpuacct"); | |
672 | if (!cgroup) | |
673 | return 0; | |
674 | prune_init_slice(cgroup); | |
c9c93749 | 675 | |
e9712933 | 676 | if (!cgroup_ops->get(cgroup_ops, "cpuacct", cgroup, "cpuacct.usage", &usage_str)) |
1f5596dd CB |
677 | return 0; |
678 | ||
c83158f2 CB |
679 | if (safe_uint64(usage_str, &usage, 10) < 0) |
680 | lxcfs_error("Failed to convert usage %s", usage_str); | |
681 | ||
1f5596dd CB |
682 | return ((double)usage / 1000000000); |
683 | } | |
684 | ||
685 | static uint64_t get_reaper_start_time(pid_t pid) | |
686 | { | |
757a63e7 | 687 | __do_free void *fopen_cache = NULL; |
12a60884 | 688 | __do_fclose FILE *f = NULL; |
1f5596dd | 689 | int ret; |
1f5596dd | 690 | uint64_t starttime; |
c9c93749 CB |
691 | char path[STRLITERALLEN("/proc/") + LXCFS_NUMSTRLEN64 + |
692 | STRLITERALLEN("/stat") + 1]; | |
1f5596dd CB |
693 | pid_t qpid; |
694 | ||
695 | qpid = lookup_initpid_in_store(pid); | |
c9c93749 CB |
696 | if (qpid <= 0) |
697 | return ret_errno(EINVAL); | |
1f5596dd | 698 | |
c9c93749 CB |
699 | ret = snprintf(path, sizeof(path), "/proc/%d/stat", qpid); |
700 | if (ret < 0 || (size_t)ret >= sizeof(path)) | |
701 | return ret_errno(EINVAL); | |
1f5596dd | 702 | |
757a63e7 | 703 | f = fopen_cached(path, "re", &fopen_cache); |
c9c93749 CB |
704 | if (!f) |
705 | return ret_errno(EINVAL); | |
1f5596dd CB |
706 | |
707 | /* Note that the *scanf() argument supression requires that length | |
708 | * modifiers such as "l" are omitted. Otherwise some compilers will yell | |
709 | * at us. It's like telling someone you're not married and then asking | |
710 | * if you can bring your wife to the party. | |
711 | */ | |
712 | ret = fscanf(f, "%*d " /* (1) pid %d */ | |
713 | "%*s " /* (2) comm %s */ | |
714 | "%*c " /* (3) state %c */ | |
715 | "%*d " /* (4) ppid %d */ | |
716 | "%*d " /* (5) pgrp %d */ | |
717 | "%*d " /* (6) session %d */ | |
718 | "%*d " /* (7) tty_nr %d */ | |
719 | "%*d " /* (8) tpgid %d */ | |
720 | "%*u " /* (9) flags %u */ | |
721 | "%*u " /* (10) minflt %lu */ | |
722 | "%*u " /* (11) cminflt %lu */ | |
723 | "%*u " /* (12) majflt %lu */ | |
724 | "%*u " /* (13) cmajflt %lu */ | |
725 | "%*u " /* (14) utime %lu */ | |
726 | "%*u " /* (15) stime %lu */ | |
727 | "%*d " /* (16) cutime %ld */ | |
728 | "%*d " /* (17) cstime %ld */ | |
729 | "%*d " /* (18) priority %ld */ | |
730 | "%*d " /* (19) nice %ld */ | |
731 | "%*d " /* (20) num_threads %ld */ | |
732 | "%*d " /* (21) itrealvalue %ld */ | |
733 | "%" PRIu64, /* (22) starttime %llu */ | |
734 | &starttime); | |
12a60884 | 735 | if (ret != 1) |
c9c93749 | 736 | return ret_errno(EINVAL); |
1f5596dd | 737 | |
12a60884 | 738 | return ret_set_errno(starttime, 0); |
1f5596dd CB |
739 | } |
740 | ||
741 | static double get_reaper_start_time_in_sec(pid_t pid) | |
742 | { | |
743 | uint64_t clockticks, ticks_per_sec; | |
744 | int64_t ret; | |
745 | double res = 0; | |
746 | ||
747 | clockticks = get_reaper_start_time(pid); | |
c9c93749 | 748 | if (clockticks <= 0) |
f75d5b75 | 749 | return log_debug(0, "Failed to retrieve start time of pid %d", pid); |
1f5596dd CB |
750 | |
751 | ret = sysconf(_SC_CLK_TCK); | |
c9c93749 | 752 | if (ret < 0) |
f75d5b75 | 753 | return log_debug(0, "Failed to determine number of clock ticks in a second"); |
1f5596dd CB |
754 | |
755 | ticks_per_sec = (uint64_t)ret; | |
756 | res = (double)clockticks / ticks_per_sec; | |
757 | return res; | |
758 | } | |
759 | ||
760 | static double get_reaper_age(pid_t pid) | |
761 | { | |
762 | uint64_t uptime_ms; | |
763 | double procstart, procage; | |
764 | ||
c9c93749 CB |
765 | /* |
766 | * We need to substract the time the process has started since system | |
1f5596dd CB |
767 | * boot minus the time when the system has started to get the actual |
768 | * reaper age. | |
769 | */ | |
770 | procstart = get_reaper_start_time_in_sec(pid); | |
771 | procage = procstart; | |
772 | if (procstart > 0) { | |
773 | int ret; | |
774 | struct timespec spec; | |
775 | ||
776 | ret = clock_gettime(CLOCK_BOOTTIME, &spec); | |
777 | if (ret < 0) | |
778 | return 0; | |
779 | ||
1f5596dd CB |
780 | uptime_ms = (spec.tv_sec * 1000) + (spec.tv_nsec * 1e-6); |
781 | procage = (uptime_ms - (procstart * 1000)) / 1000; | |
782 | } | |
783 | ||
784 | return procage; | |
785 | } | |
786 | ||
787 | /* | |
788 | * We read /proc/uptime and reuse its second field. | |
789 | * For the first field, we use the mtime for the reaper for | |
790 | * the calling pid as returned by getreaperage | |
791 | */ | |
792 | static int proc_uptime_read(char *buf, size_t size, off_t offset, | |
793 | struct fuse_file_info *fi) | |
794 | { | |
795 | struct fuse_context *fc = fuse_get_context(); | |
99b183fb | 796 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
1f5596dd | 797 | char *cache = d->buf; |
d7718002 | 798 | ssize_t total_len = 0, ret = 0; |
ea725aba | 799 | double busytime, idletime, reaperage; |
1f5596dd | 800 | |
b1ef0dde | 801 | #ifdef RELOADTEST |
1f5596dd CB |
802 | iwashere(); |
803 | #endif | |
804 | ||
f75d5b75 | 805 | if (offset) { |
3cf1e562 | 806 | size_t left; |
f75d5b75 | 807 | |
1f5596dd CB |
808 | if (offset > d->size) |
809 | return -EINVAL; | |
f75d5b75 | 810 | |
d7718002 CB |
811 | if (!d->cached) |
812 | return 0; | |
813 | ||
f75d5b75 CB |
814 | left = d->size - offset; |
815 | total_len = left > size ? size : left; | |
1f5596dd | 816 | memcpy(buf, cache + offset, total_len); |
f75d5b75 | 817 | |
1f5596dd CB |
818 | return total_len; |
819 | } | |
820 | ||
821 | reaperage = get_reaper_age(fc->pid); | |
f75d5b75 CB |
822 | /* |
823 | * To understand why this is done, please read the comment to the | |
1f5596dd CB |
824 | * get_reaper_busy() function. |
825 | */ | |
826 | idletime = reaperage; | |
ea725aba | 827 | busytime = get_reaper_busy(fc->pid); |
1f5596dd CB |
828 | if (reaperage >= busytime) |
829 | idletime = reaperage - busytime; | |
830 | ||
d7718002 CB |
831 | ret = snprintf(d->buf, d->buflen, "%.2lf %.2lf\n", reaperage, idletime); |
832 | if (ret < 0 || ret >= d->buflen) | |
c9c93749 | 833 | return read_file_fuse("/proc/uptime", buf, size, d); |
d7718002 | 834 | total_len = ret; |
1f5596dd | 835 | |
1f5596dd | 836 | d->cached = 1; |
d7718002 | 837 | d->size = total_len; |
3cf1e562 | 838 | if ((size_t)total_len > size) |
f75d5b75 | 839 | total_len = size; |
1f5596dd | 840 | memcpy(buf, d->buf, total_len); |
c9c93749 | 841 | |
1f5596dd CB |
842 | return total_len; |
843 | } | |
844 | ||
845 | #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2) | |
846 | static int proc_stat_read(char *buf, size_t size, off_t offset, | |
847 | struct fuse_file_info *fi) | |
848 | { | |
849 | __do_free char *cg = NULL, *cpuset = NULL, *line = NULL; | |
757a63e7 | 850 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
851 | __do_free struct cpuacct_usage *cg_cpu_usage = NULL; |
852 | __do_fclose FILE *f = NULL; | |
853 | struct fuse_context *fc = fuse_get_context(); | |
8044f626 | 854 | struct lxcfs_opts *opts = (struct lxcfs_opts *)fc->private_data; |
99b183fb | 855 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
856 | size_t linelen = 0, total_len = 0; |
857 | int curcpu = -1; /* cpu numbering starts at 0 */ | |
858 | int physcpu = 0; | |
1ba088ae CB |
859 | uint64_t user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, |
860 | softirq = 0, steal = 0, guest = 0, guest_nice = 0; | |
861 | uint64_t user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, | |
862 | iowait_sum = 0, irq_sum = 0, softirq_sum = 0, steal_sum = 0, | |
863 | guest_sum = 0, guest_nice_sum = 0; | |
1f5596dd CB |
864 | char cpuall[CPUALL_MAX_SIZE]; |
865 | /* reserve for cpu all */ | |
866 | char *cache = d->buf + CPUALL_MAX_SIZE; | |
867 | size_t cache_size = d->buflen - CPUALL_MAX_SIZE; | |
868 | int cg_cpu_usage_size = 0; | |
869 | ||
7b367480 | 870 | if (offset) { |
3cf1e562 | 871 | size_t left; |
7b367480 | 872 | |
1f5596dd CB |
873 | if (offset > d->size) |
874 | return -EINVAL; | |
7b367480 | 875 | |
1f5596dd CB |
876 | if (!d->cached) |
877 | return 0; | |
7b367480 CB |
878 | |
879 | left = d->size - offset; | |
880 | total_len = left > size ? size : left; | |
1f5596dd | 881 | memcpy(buf, d->buf + offset, total_len); |
7b367480 | 882 | |
1f5596dd CB |
883 | return total_len; |
884 | } | |
885 | ||
886 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
a9f0d623 | 887 | if (initpid <= 1 || is_shared_pidns(initpid)) |
1f5596dd CB |
888 | initpid = fc->pid; |
889 | ||
890 | /* | |
891 | * when container run with host pid namespace initpid == 1, cgroup will "/" | |
892 | * we should return host os's /proc contents. | |
893 | * in some case cpuacct_usage.all in "/" will larger then /proc/stat | |
894 | */ | |
605e157d | 895 | if (initpid == 1) |
6300e6c6 | 896 | return read_file_fuse("/proc/stat", buf, size, d); |
1f5596dd CB |
897 | |
898 | cg = get_pid_cgroup(initpid, "cpuset"); | |
1f5596dd CB |
899 | if (!cg) |
900 | return read_file_fuse("/proc/stat", buf, size, d); | |
901 | prune_init_slice(cg); | |
902 | ||
903 | cpuset = get_cpuset(cg); | |
904 | if (!cpuset) | |
905 | return 0; | |
906 | ||
77711d7a CB |
907 | f = fopen_cached("/proc/stat", "re", &fopen_cache); |
908 | if (!f) | |
909 | return 0; | |
910 | ||
1494771e CB |
911 | /* Skip first system cpu line. */ |
912 | if (getline(&line, &linelen, f) < 0) | |
913 | return log_error(0, "proc_stat_read read first line failed"); | |
914 | ||
1f5596dd CB |
915 | /* |
916 | * Read cpuacct.usage_all for all CPUs. | |
917 | * If the cpuacct cgroup is present, it is used to calculate the container's | |
918 | * CPU usage. If not, values from the host's /proc/stat are used. | |
919 | */ | |
f9434b9a CB |
920 | if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage, &cg_cpu_usage_size) == 0) { |
921 | if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs) { | |
922 | total_len = cpuview_proc_stat(cg, cpuset, cg_cpu_usage, | |
923 | cg_cpu_usage_size, f, | |
924 | d->buf, d->buflen); | |
925 | goto out; | |
926 | } | |
927 | } else { | |
928 | lxcfs_v("proc_stat_read failed to read from cpuacct, falling back to the host's /proc/stat"); | |
929 | } | |
1f5596dd | 930 | |
1f5596dd CB |
931 | while (getline(&line, &linelen, f) != -1) { |
932 | ssize_t l; | |
933 | char cpu_char[10]; /* That's a lot of cores */ | |
934 | char *c; | |
935 | uint64_t all_used, cg_used, new_idle; | |
936 | int ret; | |
937 | ||
938 | if (strlen(line) == 0) | |
939 | continue; | |
940 | if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) { | |
941 | /* not a ^cpuN line containing a number N, just print it */ | |
942 | l = snprintf(cache, cache_size, "%s", line); | |
f75d5b75 CB |
943 | if (l < 0) |
944 | return log_error(0, "Failed to write cache"); | |
3cf1e562 | 945 | if ((size_t)l >= cache_size) |
f75d5b75 CB |
946 | return log_error(0, "Write to cache was truncated"); |
947 | ||
1f5596dd CB |
948 | cache += l; |
949 | cache_size -= l; | |
950 | total_len += l; | |
f75d5b75 | 951 | |
1f5596dd CB |
952 | continue; |
953 | } | |
954 | ||
955 | if (sscanf(cpu_char, "%d", &physcpu) != 1) | |
956 | continue; | |
f75d5b75 | 957 | |
1f5596dd CB |
958 | if (!cpu_in_cpuset(physcpu, cpuset)) |
959 | continue; | |
f75d5b75 | 960 | |
2b8eff1d | 961 | curcpu++; |
1f5596dd CB |
962 | |
963 | ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", | |
964 | &user, | |
965 | &nice, | |
966 | &system, | |
967 | &idle, | |
968 | &iowait, | |
969 | &irq, | |
970 | &softirq, | |
971 | &steal, | |
972 | &guest, | |
973 | &guest_nice); | |
1f5596dd CB |
974 | if (ret != 10 || !cg_cpu_usage) { |
975 | c = strchr(line, ' '); | |
976 | if (!c) | |
977 | continue; | |
1f5596dd | 978 | |
f75d5b75 CB |
979 | l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c); |
980 | if (l < 0) | |
981 | return log_error(0, "Failed to write cache"); | |
3cf1e562 | 982 | if ((size_t)l >= cache_size) |
f75d5b75 | 983 | return log_error(0, "Write to cache was truncated"); |
1f5596dd CB |
984 | |
985 | cache += l; | |
986 | cache_size -= l; | |
987 | total_len += l; | |
988 | ||
989 | if (ret != 10) | |
990 | continue; | |
991 | } | |
992 | ||
993 | if (cg_cpu_usage) { | |
994 | if (physcpu >= cg_cpu_usage_size) | |
995 | break; | |
996 | ||
997 | all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice; | |
998 | cg_used = cg_cpu_usage[physcpu].user + cg_cpu_usage[physcpu].system; | |
999 | ||
1000 | if (all_used >= cg_used) { | |
1001 | new_idle = idle + (all_used - cg_used); | |
1f5596dd | 1002 | } else { |
1e3aa115 | 1003 | lxcfs_debug("cpu%d from %s has unexpected cpu time: %" PRIu64 " in /proc/stat, %" PRIu64 " in cpuacct.usage_all; unable to determine idle time", |
2b8eff1d | 1004 | curcpu, cg, all_used, cg_used); |
1f5596dd CB |
1005 | new_idle = idle; |
1006 | } | |
1007 | ||
2b8eff1d CB |
1008 | l = snprintf(cache, cache_size, |
1009 | "cpu%d %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n", | |
1010 | curcpu, cg_cpu_usage[physcpu].user, | |
1011 | cg_cpu_usage[physcpu].system, new_idle); | |
f75d5b75 CB |
1012 | if (l < 0) |
1013 | return log_error(0, "Failed to write cache"); | |
3cf1e562 | 1014 | if ((size_t)l >= cache_size) |
f75d5b75 | 1015 | return log_error(0, "Write to cache was truncated"); |
1f5596dd CB |
1016 | |
1017 | cache += l; | |
1018 | cache_size -= l; | |
1019 | total_len += l; | |
1020 | ||
1021 | user_sum += cg_cpu_usage[physcpu].user; | |
1022 | system_sum += cg_cpu_usage[physcpu].system; | |
1023 | idle_sum += new_idle; | |
1f5596dd CB |
1024 | } else { |
1025 | user_sum += user; | |
1026 | nice_sum += nice; | |
1027 | system_sum += system; | |
1028 | idle_sum += idle; | |
1029 | iowait_sum += iowait; | |
1030 | irq_sum += irq; | |
1031 | softirq_sum += softirq; | |
1032 | steal_sum += steal; | |
1033 | guest_sum += guest; | |
1034 | guest_nice_sum += guest_nice; | |
1035 | } | |
1036 | } | |
1037 | ||
1038 | cache = d->buf; | |
1039 | ||
1040 | int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "cpu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", | |
1041 | user_sum, | |
1042 | nice_sum, | |
1043 | system_sum, | |
1044 | idle_sum, | |
1045 | iowait_sum, | |
1046 | irq_sum, | |
1047 | softirq_sum, | |
1048 | steal_sum, | |
1049 | guest_sum, | |
1050 | guest_nice_sum); | |
1051 | if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE) { | |
1052 | memcpy(cache, cpuall, cpuall_len); | |
1053 | cache += cpuall_len; | |
1054 | } else { | |
1055 | /* shouldn't happen */ | |
f75d5b75 | 1056 | lxcfs_error("proc_stat_read copy cpuall failed, cpuall_len=%d", cpuall_len); |
1f5596dd CB |
1057 | cpuall_len = 0; |
1058 | } | |
1059 | ||
1060 | memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len); | |
1061 | total_len += cpuall_len; | |
1062 | ||
1063 | out: | |
1064 | d->cached = 1; | |
1065 | d->size = total_len; | |
1066 | if (total_len > size) | |
1067 | total_len = size; | |
1068 | ||
1069 | memcpy(buf, d->buf, total_len); | |
1070 | return total_len; | |
1071 | } | |
1072 | ||
1073 | /* Note that "memory.stat" in cgroup2 is hierarchical by default. */ | |
acff9786 | 1074 | static bool cgroup_parse_memory_stat(const char *cgroup, struct memory_stat *mstat) |
1f5596dd | 1075 | { |
05b7a16d | 1076 | __do_close int fd = -EBADF; |
acff9786 CB |
1077 | __do_fclose FILE *f = NULL; |
1078 | __do_free char *line = NULL; | |
28519477 | 1079 | __do_free void *fdopen_cache = NULL; |
acff9786 CB |
1080 | bool unified; |
1081 | size_t len = 0; | |
1082 | ssize_t linelen; | |
1f5596dd | 1083 | |
acff9786 CB |
1084 | fd = cgroup_ops->get_memory_stats_fd(cgroup_ops, cgroup); |
1085 | if (fd < 0) | |
1086 | return false; | |
1087 | ||
28519477 | 1088 | f = fdopen_cached(fd, "re", &fdopen_cache); |
acff9786 CB |
1089 | if (!f) |
1090 | return false; | |
acff9786 CB |
1091 | |
1092 | unified = pure_unified_layout(cgroup_ops); | |
1093 | while ((linelen = getline(&line, &len, f)) != -1) { | |
1094 | if (!unified && startswith(line, "hierarchical_memory_limit")) { | |
1095 | sscanf(line, "hierarchical_memory_limit %" PRIu64, &(mstat->hierarchical_memory_limit)); | |
1096 | } else if (!unified && startswith(line, "hierarchical_memsw_limit")) { | |
1097 | sscanf(line, "hierarchical_memsw_limit %" PRIu64, &(mstat->hierarchical_memsw_limit)); | |
91d63a9e IM |
1098 | } else if (startswith(line, unified ? "file" :"total_cache")) { |
1099 | sscanf(line, unified ? "file %" PRIu64 : "total_cache %" PRIu64, &(mstat->total_cache)); | |
acff9786 CB |
1100 | } else if (!unified && startswith(line, "total_rss")) { |
1101 | sscanf(line, "total_rss %" PRIu64, &(mstat->total_rss)); | |
1102 | } else if (!unified && startswith(line, "total_rss_huge")) { | |
1103 | sscanf(line, "total_rss_huge %" PRIu64, &(mstat->total_rss_huge)); | |
1104 | } else if (startswith(line, unified ? "shmem" : "total_shmem")) { | |
1105 | sscanf(line, unified ? "shmem %" PRIu64 : "total_shmem %" PRIu64, &(mstat->total_shmem)); | |
1106 | } else if (startswith(line, unified ? "file_mapped" : "total_mapped_file")) { | |
1107 | sscanf(line, unified ? "file_mapped %" PRIu64 : "total_mapped_file %" PRIu64, &(mstat->total_mapped_file)); | |
1108 | } else if (!unified && startswith(line, "total_dirty")) { | |
1109 | sscanf(line, "total_dirty %" PRIu64, &(mstat->total_dirty)); | |
1110 | } else if (!unified && startswith(line, "total_writeback")) { | |
1111 | sscanf(line, "total_writeback %" PRIu64, &(mstat->total_writeback)); | |
1112 | } else if (!unified && startswith(line, "total_swap")) { | |
1113 | sscanf(line, "total_swap %" PRIu64, &(mstat->total_swap)); | |
1114 | } else if (!unified && startswith(line, "total_pgpgin")) { | |
1115 | sscanf(line, "total_pgpgin %" PRIu64, &(mstat->total_pgpgin)); | |
1116 | } else if (!unified && startswith(line, "total_pgpgout")) { | |
1117 | sscanf(line, "total_pgpgout %" PRIu64, &(mstat->total_pgpgout)); | |
1118 | } else if (startswith(line, unified ? "pgfault" : "total_pgfault")) { | |
1119 | sscanf(line, unified ? "pgfault %" PRIu64 : "total_pgfault %" PRIu64, &(mstat->total_pgfault)); | |
1120 | } else if (startswith(line, unified ? "pgmajfault" : "total_pgmajfault")) { | |
1121 | sscanf(line, unified ? "pgmajfault %" PRIu64 : "total_pgmajfault %" PRIu64, &(mstat->total_pgmajfault)); | |
1122 | } else if (startswith(line, unified ? "inactive_anon" : "total_inactive_anon")) { | |
1123 | sscanf(line, unified ? "inactive_anon %" PRIu64 : "total_inactive_anon %" PRIu64, &(mstat->total_inactive_anon)); | |
1124 | } else if (startswith(line, unified ? "active_anon" : "total_active_anon")) { | |
1125 | sscanf(line, unified ? "active_anon %" PRIu64 : "total_active_anon %" PRIu64, &(mstat->total_active_anon)); | |
1126 | } else if (startswith(line, unified ? "inactive_file" : "total_inactive_file")) { | |
1127 | sscanf(line, unified ? "inactive_file %" PRIu64 : "total_inactive_file %" PRIu64, &(mstat->total_inactive_file)); | |
1128 | } else if (startswith(line, unified ? "active_file" : "total_active_file")) { | |
1129 | sscanf(line, unified ? "active_file %" PRIu64 : "total_active_file %" PRIu64, &(mstat->total_active_file)); | |
1130 | } else if (startswith(line, unified ? "unevictable" : "total_unevictable")) { | |
1131 | sscanf(line, unified ? "unevictable %" PRIu64 : "total_unevictable %" PRIu64, &(mstat->total_unevictable)); | |
1f5596dd | 1132 | } |
1f5596dd | 1133 | } |
acff9786 CB |
1134 | |
1135 | return true; | |
1f5596dd CB |
1136 | } |
1137 | ||
1f5596dd CB |
1138 | static int proc_meminfo_read(char *buf, size_t size, off_t offset, |
1139 | struct fuse_file_info *fi) | |
1140 | { | |
63f35cc0 | 1141 | __do_free char *cgroup = NULL, *line = NULL, *memusage_str = NULL, |
362d1193 | 1142 | *memswusage_str = NULL, *memswpriority_str = NULL; |
757a63e7 | 1143 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
1144 | __do_fclose FILE *f = NULL; |
1145 | struct fuse_context *fc = fuse_get_context(); | |
84e184b1 | 1146 | bool wants_swap = lxcfs_has_opt(fuse_get_context()->private_data, LXCFS_SWAP_ON); |
99b183fb | 1147 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
acff9786 | 1148 | uint64_t memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0, |
362d1193 SG |
1149 | hosttotal = 0, swfree = 0, swusage = 0, swtotal = 0, |
1150 | memswpriority = 1; | |
334a14f9 | 1151 | struct memory_stat mstat = {}; |
1f5596dd CB |
1152 | size_t linelen = 0, total_len = 0; |
1153 | char *cache = d->buf; | |
1154 | size_t cache_size = d->buflen; | |
1155 | int ret; | |
1156 | ||
1157 | if (offset) { | |
3cf1e562 | 1158 | size_t left; |
1f5596dd CB |
1159 | |
1160 | if (offset > d->size) | |
1161 | return -EINVAL; | |
1162 | ||
1163 | if (!d->cached) | |
1164 | return 0; | |
1165 | ||
1166 | left = d->size - offset; | |
1167 | total_len = left > size ? size : left; | |
1168 | memcpy(buf, cache + offset, total_len); | |
1169 | ||
1170 | return total_len; | |
1171 | } | |
1172 | ||
1173 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
1174 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
1175 | initpid = fc->pid; | |
1176 | ||
1177 | cgroup = get_pid_cgroup(initpid, "memory"); | |
1178 | if (!cgroup) | |
1179 | return read_file_fuse("/proc/meminfo", buf, size, d); | |
1180 | ||
1181 | prune_init_slice(cgroup); | |
1182 | ||
b7b018d0 | 1183 | /* memory limits */ |
1f5596dd CB |
1184 | ret = cgroup_ops->get_memory_current(cgroup_ops, cgroup, &memusage_str); |
1185 | if (ret < 0) | |
b0f33646 | 1186 | return read_file_fuse("/proc/meminfo", buf, size, d); |
1f5596dd | 1187 | |
b7b018d0 CB |
1188 | if (safe_uint64(memusage_str, &memusage, 10) < 0) |
1189 | lxcfs_error("Failed to convert memusage %s", memusage_str); | |
1190 | ||
acff9786 | 1191 | if (!cgroup_parse_memory_stat(cgroup, &mstat)) |
b0f33646 | 1192 | return read_file_fuse("/proc/meminfo", buf, size, d); |
1f5596dd | 1193 | |
b7b018d0 CB |
1194 | memlimit = get_min_memlimit(cgroup, false); |
1195 | ||
1f5596dd CB |
1196 | /* |
1197 | * Following values are allowed to fail, because swapaccount might be | |
1198 | * turned off for current kernel. | |
1199 | */ | |
2f2080c1 | 1200 | if (wants_swap) { |
b7b018d0 CB |
1201 | memswlimit = get_min_memlimit(cgroup, true); |
1202 | if (memswlimit > 0) { | |
2f2080c1 | 1203 | ret = cgroup_ops->get_memory_swap_current(cgroup_ops, cgroup, &memswusage_str); |
b7b018d0 CB |
1204 | if (ret >= 0 && safe_uint64(memswusage_str, &memswusage, 10) == 0) { |
1205 | if (memlimit > memswlimit) | |
1206 | swtotal = 0; | |
1207 | else | |
1208 | swtotal = (memswlimit - memlimit) / 1024; | |
1209 | if (memusage > memswusage || swtotal == 0) | |
1210 | swusage = 0; | |
1211 | else | |
1212 | swusage = (memswusage - memusage) / 1024; | |
6bfe1016 | 1213 | } |
2f2080c1 | 1214 | } |
362d1193 SG |
1215 | |
1216 | ret = cgroup_ops->get_memory_swappiness(cgroup_ops, cgroup, &memswpriority_str); | |
1217 | if (ret >= 0) | |
1218 | safe_uint64(memswpriority_str, &memswpriority, 10); | |
1f5596dd CB |
1219 | } |
1220 | ||
757a63e7 | 1221 | f = fopen_cached("/proc/meminfo", "re", &fopen_cache); |
1f5596dd | 1222 | if (!f) |
b0f33646 | 1223 | return read_file_fuse("/proc/meminfo", buf, size, d); |
1f5596dd | 1224 | |
b7b018d0 CB |
1225 | memusage /= 1024; |
1226 | memlimit /= 1024; | |
1f5596dd CB |
1227 | while (getline(&line, &linelen, f) != -1) { |
1228 | ssize_t l; | |
1229 | char *printme, lbuf[100]; | |
1230 | ||
1231 | memset(lbuf, 0, 100); | |
1232 | if (startswith(line, "MemTotal:")) { | |
acff9786 | 1233 | sscanf(line+sizeof("MemTotal:")-1, "%" PRIu64, &hosttotal); |
114eb8b8 CB |
1234 | if (memlimit == 0) |
1235 | memlimit = hosttotal; | |
1236 | ||
1f5596dd CB |
1237 | if (hosttotal < memlimit) |
1238 | memlimit = hosttotal; | |
acff9786 | 1239 | snprintf(lbuf, 100, "MemTotal: %8" PRIu64 " kB\n", memlimit); |
1f5596dd CB |
1240 | printme = lbuf; |
1241 | } else if (startswith(line, "MemFree:")) { | |
acff9786 | 1242 | snprintf(lbuf, 100, "MemFree: %8" PRIu64 " kB\n", memlimit - memusage); |
1f5596dd CB |
1243 | printme = lbuf; |
1244 | } else if (startswith(line, "MemAvailable:")) { | |
acff9786 | 1245 | snprintf(lbuf, 100, "MemAvailable: %8" PRIu64 " kB\n", memlimit - memusage + mstat.total_cache / 1024); |
1f5596dd | 1246 | printme = lbuf; |
07c90197 | 1247 | } else if (startswith(line, "SwapTotal:")) { |
b7b018d0 CB |
1248 | if (wants_swap) { |
1249 | uint64_t hostswtotal = 0; | |
1250 | ||
1251 | sscanf(line + STRLITERALLEN("SwapTotal:"), "%" PRIu64, &hostswtotal); | |
1252 | ||
33aa929e SG |
1253 | /* The total amount of swap is always reported to be the |
1254 | lesser of the RAM+SWAP limit or the SWAP device size. | |
1255 | This is because the kernel can swap as much as it | |
1256 | wants and not only up to swtotal. */ | |
1257 | ||
1258 | swtotal = memlimit + swtotal; | |
1259 | if (hostswtotal < swtotal) { | |
b7b018d0 | 1260 | swtotal = hostswtotal; |
b7b018d0 | 1261 | } |
362d1193 SG |
1262 | |
1263 | /* When swappiness is 0, pretend we can't swap. */ | |
1264 | if (memswpriority == 0) { | |
1265 | swtotal = swusage; | |
1266 | } | |
b7b018d0 CB |
1267 | } |
1268 | ||
6bfe1016 | 1269 | snprintf(lbuf, 100, "SwapTotal: %8" PRIu64 " kB\n", swtotal); |
1f5596dd | 1270 | printme = lbuf; |
07c90197 | 1271 | } else if (startswith(line, "SwapFree:")) { |
b7b018d0 | 1272 | if (wants_swap) { |
33aa929e | 1273 | swfree = swtotal - swusage; |
07c90197 | 1274 | } |
b7b018d0 | 1275 | |
6bfe1016 | 1276 | snprintf(lbuf, 100, "SwapFree: %8" PRIu64 " kB\n", swfree); |
1f5596dd CB |
1277 | printme = lbuf; |
1278 | } else if (startswith(line, "Slab:")) { | |
6ddc3c00 | 1279 | snprintf(lbuf, 100, "Slab: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1280 | printme = lbuf; |
1281 | } else if (startswith(line, "Buffers:")) { | |
acff9786 | 1282 | snprintf(lbuf, 100, "Buffers: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1283 | printme = lbuf; |
1284 | } else if (startswith(line, "Cached:")) { | |
acff9786 CB |
1285 | snprintf(lbuf, 100, "Cached: %8" PRIu64 " kB\n", |
1286 | mstat.total_cache / 1024); | |
1f5596dd CB |
1287 | printme = lbuf; |
1288 | } else if (startswith(line, "SwapCached:")) { | |
acff9786 | 1289 | snprintf(lbuf, 100, "SwapCached: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1290 | printme = lbuf; |
1291 | } else if (startswith(line, "Active:")) { | |
acff9786 CB |
1292 | snprintf(lbuf, 100, "Active: %8" PRIu64 " kB\n", |
1293 | (mstat.total_active_anon + | |
1294 | mstat.total_active_file) / | |
1295 | 1024); | |
1f5596dd CB |
1296 | printme = lbuf; |
1297 | } else if (startswith(line, "Inactive:")) { | |
acff9786 CB |
1298 | snprintf(lbuf, 100, "Inactive: %8" PRIu64 " kB\n", |
1299 | (mstat.total_inactive_anon + | |
1300 | mstat.total_inactive_file) / | |
1301 | 1024); | |
1f5596dd | 1302 | printme = lbuf; |
659b0278 | 1303 | } else if (startswith(line, "Active(anon):")) { |
acff9786 CB |
1304 | snprintf(lbuf, 100, "Active(anon): %8" PRIu64 " kB\n", |
1305 | mstat.total_active_anon / 1024); | |
1f5596dd | 1306 | printme = lbuf; |
659b0278 | 1307 | } else if (startswith(line, "Inactive(anon):")) { |
acff9786 CB |
1308 | snprintf(lbuf, 100, "Inactive(anon): %8" PRIu64 " kB\n", |
1309 | mstat.total_inactive_anon / 1024); | |
1f5596dd | 1310 | printme = lbuf; |
659b0278 | 1311 | } else if (startswith(line, "Active(file):")) { |
acff9786 CB |
1312 | snprintf(lbuf, 100, "Active(file): %8" PRIu64 " kB\n", |
1313 | mstat.total_active_file / 1024); | |
1f5596dd | 1314 | printme = lbuf; |
659b0278 | 1315 | } else if (startswith(line, "Inactive(file):")) { |
acff9786 CB |
1316 | snprintf(lbuf, 100, "Inactive(file): %8" PRIu64 " kB\n", |
1317 | mstat.total_inactive_file / 1024); | |
1f5596dd | 1318 | printme = lbuf; |
659b0278 | 1319 | } else if (startswith(line, "Unevictable:")) { |
acff9786 CB |
1320 | snprintf(lbuf, 100, "Unevictable: %8" PRIu64 " kB\n", |
1321 | mstat.total_unevictable / 1024); | |
1322 | printme = lbuf; | |
659b0278 | 1323 | } else if (startswith(line, "Dirty:")) { |
acff9786 CB |
1324 | snprintf(lbuf, 100, "Dirty: %8" PRIu64 " kB\n", |
1325 | mstat.total_dirty / 1024); | |
1326 | printme = lbuf; | |
659b0278 | 1327 | } else if (startswith(line, "Writeback:")) { |
acff9786 CB |
1328 | snprintf(lbuf, 100, "Writeback: %8" PRIu64 " kB\n", |
1329 | mstat.total_writeback / 1024); | |
1330 | printme = lbuf; | |
659b0278 | 1331 | } else if (startswith(line, "AnonPages:")) { |
acff9786 CB |
1332 | snprintf(lbuf, 100, "AnonPages: %8" PRIu64 " kB\n", |
1333 | (mstat.total_active_anon + | |
1334 | mstat.total_inactive_anon - mstat.total_shmem) / | |
1335 | 1024); | |
1336 | printme = lbuf; | |
659b0278 | 1337 | } else if (startswith(line, "Mapped:")) { |
acff9786 CB |
1338 | snprintf(lbuf, 100, "Mapped: %8" PRIu64 " kB\n", |
1339 | mstat.total_mapped_file / 1024); | |
1f5596dd | 1340 | printme = lbuf; |
659b0278 | 1341 | } else if (startswith(line, "SReclaimable:")) { |
acff9786 | 1342 | snprintf(lbuf, 100, "SReclaimable: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd | 1343 | printme = lbuf; |
659b0278 | 1344 | } else if (startswith(line, "SUnreclaim:")) { |
acff9786 | 1345 | snprintf(lbuf, 100, "SUnreclaim: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd CB |
1346 | printme = lbuf; |
1347 | } else if (startswith(line, "Shmem:")) { | |
acff9786 CB |
1348 | snprintf(lbuf, 100, "Shmem: %8" PRIu64 " kB\n", |
1349 | mstat.total_shmem / 1024); | |
1f5596dd | 1350 | printme = lbuf; |
659b0278 | 1351 | } else if (startswith(line, "ShmemHugePages:")) { |
acff9786 | 1352 | snprintf(lbuf, 100, "ShmemHugePages: %8" PRIu64 " kB\n", (uint64_t)0); |
1f5596dd | 1353 | printme = lbuf; |
659b0278 | 1354 | } else if (startswith(line, "ShmemPmdMapped:")) { |
acff9786 CB |
1355 | snprintf(lbuf, 100, "ShmemPmdMapped: %8" PRIu64 " kB\n", (uint64_t)0); |
1356 | printme = lbuf; | |
659b0278 | 1357 | } else if (startswith(line, "AnonHugePages:")) { |
acff9786 CB |
1358 | snprintf(lbuf, 100, "AnonHugePages: %8" PRIu64 " kB\n", |
1359 | mstat.total_rss_huge / 1024); | |
1f5596dd | 1360 | printme = lbuf; |
acff9786 CB |
1361 | } else { |
1362 | printme = line; | |
1363 | } | |
1f5596dd CB |
1364 | |
1365 | l = snprintf(cache, cache_size, "%s", printme); | |
f75d5b75 CB |
1366 | if (l < 0) |
1367 | return log_error(0, "Failed to write cache"); | |
3cf1e562 | 1368 | if ((size_t)l >= cache_size) |
f75d5b75 | 1369 | return log_error(0, "Write to cache was truncated"); |
1f5596dd CB |
1370 | |
1371 | cache += l; | |
1372 | cache_size -= l; | |
1373 | total_len += l; | |
1374 | } | |
1375 | ||
1376 | d->cached = 1; | |
1377 | d->size = total_len; | |
f75d5b75 CB |
1378 | if (total_len > size) |
1379 | total_len = size; | |
1f5596dd CB |
1380 | memcpy(buf, d->buf, total_len); |
1381 | ||
1382 | return total_len; | |
1383 | } | |
1384 | ||
6cc153e6 FS |
1385 | static int proc_slabinfo_read(char *buf, size_t size, off_t offset, |
1386 | struct fuse_file_info *fi) | |
1387 | { | |
1388 | __do_free char *cgroup = NULL, *line = NULL; | |
1389 | __do_free void *fopen_cache = NULL; | |
1390 | __do_fclose FILE *f = NULL; | |
1391 | __do_close int fd = -EBADF; | |
1392 | struct fuse_context *fc = fuse_get_context(); | |
1393 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); | |
1394 | size_t linelen = 0, total_len = 0; | |
1395 | char *cache = d->buf; | |
1396 | size_t cache_size = d->buflen; | |
1397 | pid_t initpid; | |
1398 | ||
1399 | if (offset) { | |
3cf1e562 | 1400 | size_t left; |
6cc153e6 FS |
1401 | |
1402 | if (offset > d->size) | |
1403 | return -EINVAL; | |
1404 | ||
1405 | if (!d->cached) | |
1406 | return 0; | |
1407 | ||
1408 | left = d->size - offset; | |
1409 | total_len = left > size ? size : left; | |
1410 | memcpy(buf, cache + offset, total_len); | |
1411 | ||
1412 | return total_len; | |
1413 | } | |
1414 | ||
1415 | initpid = lookup_initpid_in_store(fc->pid); | |
1416 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
1417 | initpid = fc->pid; | |
1418 | ||
1419 | cgroup = get_pid_cgroup(initpid, "memory"); | |
1420 | if (!cgroup) | |
1421 | return read_file_fuse("/proc/slabinfo", buf, size, d); | |
1422 | ||
1423 | prune_init_slice(cgroup); | |
1424 | ||
1425 | fd = cgroup_ops->get_memory_slabinfo_fd(cgroup_ops, cgroup); | |
1426 | if (fd < 0) | |
1427 | return read_file_fuse("/proc/slabinfo", buf, size, d); | |
1428 | ||
1429 | f = fdopen_cached(fd, "re", &fopen_cache); | |
1430 | if (!f) | |
1431 | return read_file_fuse("/proc/slabinfo", buf, size, d); | |
1432 | ||
1433 | while (getline(&line, &linelen, f) != -1) { | |
1434 | ssize_t l = snprintf(cache, cache_size, "%s", line); | |
1435 | if (l < 0) | |
1436 | return log_error(0, "Failed to write cache"); | |
3cf1e562 | 1437 | if ((size_t)l >= cache_size) |
6cc153e6 FS |
1438 | return log_error(0, "Write to cache was truncated"); |
1439 | ||
1440 | cache += l; | |
1441 | cache_size -= l; | |
1442 | total_len += l; | |
1443 | } | |
1444 | ||
1445 | d->cached = 1; | |
1446 | d->size = total_len; | |
1447 | if (total_len > size) | |
1448 | total_len = size; | |
1449 | memcpy(buf, d->buf, total_len); | |
1450 | ||
1451 | return total_len; | |
1452 | } | |
1453 | ||
2d7bcab7 CB |
1454 | __lxcfs_fuse_ops int proc_read(const char *path, char *buf, size_t size, |
1455 | off_t offset, struct fuse_file_info *fi) | |
1f5596dd | 1456 | { |
99b183fb | 1457 | struct file_info *f = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
1458 | |
1459 | switch (f->type) { | |
1460 | case LXC_TYPE_PROC_MEMINFO: | |
cbfc55fd CB |
1461 | if (liblxcfs_functional()) |
1462 | return proc_meminfo_read(buf, size, offset, fi); | |
1463 | ||
1464 | return read_file_fuse_with_offset(LXC_TYPE_PROC_MEMINFO_PATH, | |
1465 | buf, size, offset, f); | |
1f5596dd | 1466 | case LXC_TYPE_PROC_CPUINFO: |
cbfc55fd CB |
1467 | if (liblxcfs_functional()) |
1468 | return proc_cpuinfo_read(buf, size, offset, fi); | |
1469 | ||
1470 | return read_file_fuse_with_offset(LXC_TYPE_PROC_CPUINFO_PATH, | |
1471 | buf, size, offset, f); | |
1f5596dd | 1472 | case LXC_TYPE_PROC_UPTIME: |
cbfc55fd CB |
1473 | if (liblxcfs_functional()) |
1474 | return proc_uptime_read(buf, size, offset, fi); | |
1475 | ||
1476 | return read_file_fuse_with_offset(LXC_TYPE_PROC_UPTIME_PATH, | |
1477 | buf, size, offset, f); | |
1f5596dd | 1478 | case LXC_TYPE_PROC_STAT: |
cbfc55fd CB |
1479 | if (liblxcfs_functional()) |
1480 | return proc_stat_read(buf, size, offset, fi); | |
1481 | ||
1482 | return read_file_fuse_with_offset(LXC_TYPE_PROC_STAT_PATH, buf, | |
1483 | size, offset, f); | |
1f5596dd | 1484 | case LXC_TYPE_PROC_DISKSTATS: |
cbfc55fd CB |
1485 | if (liblxcfs_functional()) |
1486 | return proc_diskstats_read(buf, size, offset, fi); | |
1487 | ||
1488 | return read_file_fuse_with_offset(LXC_TYPE_PROC_DISKSTATS_PATH, | |
1489 | buf, size, offset, f); | |
1f5596dd | 1490 | case LXC_TYPE_PROC_SWAPS: |
cbfc55fd CB |
1491 | if (liblxcfs_functional()) |
1492 | return proc_swaps_read(buf, size, offset, fi); | |
1493 | ||
1494 | return read_file_fuse_with_offset(LXC_TYPE_PROC_SWAPS_PATH, buf, | |
1495 | size, offset, f); | |
1f5596dd | 1496 | case LXC_TYPE_PROC_LOADAVG: |
cbfc55fd CB |
1497 | if (liblxcfs_functional()) |
1498 | return proc_loadavg_read(buf, size, offset, fi); | |
1499 | ||
1500 | return read_file_fuse_with_offset(LXC_TYPE_PROC_LOADAVG_PATH, | |
1501 | buf, size, offset, f); | |
6cc153e6 FS |
1502 | case LXC_TYPE_PROC_SLABINFO: |
1503 | if (liblxcfs_functional()) | |
1504 | return proc_slabinfo_read(buf, size, offset, fi); | |
1505 | ||
1506 | return read_file_fuse_with_offset(LXC_TYPE_PROC_SLABINFO_PATH, | |
1507 | buf, size, offset, f); | |
1f5596dd | 1508 | } |
99b183fb CB |
1509 | |
1510 | return -EINVAL; | |
1f5596dd | 1511 | } |