]>
Commit | Line | Data |
---|---|---|
db0463bf | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
1f5596dd | 2 | |
f834b6bf SP |
3 | #include "config.h" |
4 | ||
1f5596dd CB |
5 | #include <dirent.h> |
6 | #include <errno.h> | |
7 | #include <fcntl.h> | |
1f5596dd CB |
8 | #include <inttypes.h> |
9 | #include <libgen.h> | |
10 | #include <pthread.h> | |
11 | #include <sched.h> | |
12 | #include <stdarg.h> | |
13 | #include <stdbool.h> | |
14 | #include <stdint.h> | |
15 | #include <stdio.h> | |
16 | #include <stdlib.h> | |
17 | #include <string.h> | |
18 | #include <time.h> | |
19 | #include <unistd.h> | |
20 | #include <wait.h> | |
21 | #include <linux/magic.h> | |
22 | #include <linux/sched.h> | |
23 | #include <sys/epoll.h> | |
24 | #include <sys/mman.h> | |
25 | #include <sys/mount.h> | |
26 | #include <sys/param.h> | |
27 | #include <sys/socket.h> | |
28 | #include <sys/syscall.h> | |
29 | #include <sys/sysinfo.h> | |
30 | #include <sys/vfs.h> | |
31 | ||
e01afbb7 CB |
32 | #include "proc_cpuview.h" |
33 | ||
1f5596dd | 34 | #include "bindings.h" |
1f5596dd CB |
35 | #include "cgroup_fuse.h" |
36 | #include "cpuset_parse.h" | |
37 | #include "cgroups/cgroup.h" | |
38 | #include "cgroups/cgroup_utils.h" | |
39 | #include "memory_utils.h" | |
4ec5c9da | 40 | #include "proc_loadavg.h" |
1f5596dd CB |
41 | #include "utils.h" |
42 | ||
1f5596dd CB |
43 | /* Data for CPU view */ |
44 | struct cg_proc_stat { | |
45 | char *cg; | |
ce617d73 CB |
46 | struct cpuacct_usage *usage; /* Real usage as read from the host's /proc/stat. */ |
47 | struct cpuacct_usage *view; /* Usage stats reported to the container. */ | |
1f5596dd | 48 | int cpu_count; |
ce617d73 | 49 | pthread_mutex_t lock; /* For node manipulation. */ |
1f5596dd CB |
50 | struct cg_proc_stat *next; |
51 | }; | |
52 | ||
53 | struct cg_proc_stat_head { | |
54 | struct cg_proc_stat *next; | |
55 | time_t lastcheck; | |
56 | ||
57 | /* | |
58 | * For access to the list. Reading can be parallel, pruning is exclusive. | |
59 | */ | |
60 | pthread_rwlock_t lock; | |
61 | }; | |
62 | ||
63 | #define CPUVIEW_HASH_SIZE 100 | |
64 | static struct cg_proc_stat_head *proc_stat_history[CPUVIEW_HASH_SIZE]; | |
65 | ||
b456d40d CB |
66 | static void reset_proc_stat_node(struct cg_proc_stat *node, |
67 | struct cpuacct_usage *usage, int cpu_count) | |
1f5596dd | 68 | { |
1f5596dd CB |
69 | lxcfs_debug("Resetting stat node for %s\n", node->cg); |
70 | memcpy(node->usage, usage, sizeof(struct cpuacct_usage) * cpu_count); | |
71 | ||
b456d40d | 72 | for (int i = 0; i < cpu_count; i++) { |
1f5596dd CB |
73 | node->view[i].user = 0; |
74 | node->view[i].system = 0; | |
75 | node->view[i].idle = 0; | |
76 | } | |
77 | ||
78 | node->cpu_count = cpu_count; | |
79 | } | |
80 | ||
81 | static bool expand_proc_stat_node(struct cg_proc_stat *node, int cpu_count) | |
82 | { | |
83 | __do_free struct cpuacct_usage *new_usage = NULL, *new_view = NULL; | |
84 | ||
85 | /* Allocate new memory */ | |
82d74a95 | 86 | new_usage = zalloc(sizeof(struct cpuacct_usage) * cpu_count); |
1f5596dd CB |
87 | if (!new_usage) |
88 | return false; | |
89 | ||
82d74a95 | 90 | new_view = zalloc(sizeof(struct cpuacct_usage) * cpu_count); |
1f5596dd CB |
91 | if (!new_view) |
92 | return false; | |
93 | ||
94 | /* Copy existing data & initialize new elements */ | |
95 | for (int i = 0; i < cpu_count; i++) { | |
96 | if (i < node->cpu_count) { | |
82d74a95 CB |
97 | new_usage[i].user = node->usage[i].user; |
98 | new_usage[i].system = node->usage[i].system; | |
99 | new_usage[i].idle = node->usage[i].idle; | |
100 | ||
101 | new_view[i].user = node->view[i].user; | |
102 | new_view[i].system = node->view[i].system; | |
103 | new_view[i].idle = node->view[i].idle; | |
1f5596dd CB |
104 | } |
105 | } | |
106 | ||
107 | free(node->usage); | |
108 | node->usage = move_ptr(new_usage); | |
109 | ||
110 | free(node->view); | |
111 | node->view = move_ptr(new_view); | |
112 | node->cpu_count = cpu_count; | |
113 | ||
114 | return true; | |
115 | } | |
116 | ||
4ec5c9da CB |
117 | static void free_proc_stat_node(struct cg_proc_stat *node) |
118 | { | |
6a4dceb1 CB |
119 | if (node) { |
120 | /* | |
121 | * We're abusing the usage pointer to indicate that | |
122 | * pthread_mutex_init() was successful. Don't judge me. | |
123 | */ | |
124 | if (node->usage) | |
125 | pthread_mutex_destroy(&node->lock); | |
126 | free_disarm(node->cg); | |
127 | free_disarm(node->usage); | |
128 | free_disarm(node->view); | |
129 | free_disarm(node); | |
130 | } | |
4ec5c9da CB |
131 | } |
132 | ||
6a4dceb1 CB |
133 | define_cleanup_function(struct cg_proc_stat *, free_proc_stat_node); |
134 | ||
1f5596dd CB |
135 | static struct cg_proc_stat *add_proc_stat_node(struct cg_proc_stat *new_node) |
136 | { | |
0d129671 CB |
137 | call_cleaner(free_proc_stat_node) struct cg_proc_stat *new = new_node; |
138 | struct cg_proc_stat *rv = new_node; | |
139 | int hash = calc_hash(new->cg) % CPUVIEW_HASH_SIZE; | |
1f5596dd | 140 | struct cg_proc_stat_head *head = proc_stat_history[hash]; |
0d129671 | 141 | struct cg_proc_stat *cur; |
1f5596dd CB |
142 | |
143 | pthread_rwlock_wrlock(&head->lock); | |
144 | ||
145 | if (!head->next) { | |
0d129671 | 146 | head->next = move_ptr(new); |
164acda7 | 147 | goto out_rwlock_unlock; |
1f5596dd CB |
148 | } |
149 | ||
0d129671 | 150 | cur = head->next; |
1f5596dd CB |
151 | |
152 | for (;;) { | |
0d129671 CB |
153 | /* |
154 | * The node to be added is already present in the list, so | |
155 | * free the newly allocated one and return the one we found. | |
156 | */ | |
157 | if (strcmp(cur->cg, new->cg) == 0) { | |
158 | rv = cur; | |
164acda7 | 159 | goto out_rwlock_unlock; |
1f5596dd CB |
160 | } |
161 | ||
0d129671 CB |
162 | /* Keep walking. */ |
163 | if (cur->next) { | |
164 | cur = cur->next; | |
1f5596dd CB |
165 | continue; |
166 | } | |
167 | ||
0d129671 CB |
168 | /* Add new node to end of list. */ |
169 | cur->next = move_ptr(new); | |
164acda7 | 170 | goto out_rwlock_unlock; |
1f5596dd CB |
171 | } |
172 | ||
164acda7 | 173 | out_rwlock_unlock: |
54db3e71 | 174 | pthread_mutex_lock(&rv->lock); |
1f5596dd | 175 | pthread_rwlock_unlock(&head->lock); |
0d129671 | 176 | return move_ptr(rv); |
1f5596dd CB |
177 | } |
178 | ||
6a4dceb1 CB |
179 | static struct cg_proc_stat *new_proc_stat_node(struct cpuacct_usage *usage, |
180 | int cpu_count, const char *cg) | |
1f5596dd | 181 | { |
6a4dceb1 CB |
182 | call_cleaner(free_proc_stat_node) struct cg_proc_stat *node = NULL; |
183 | __do_free struct cpuacct_usage *new_usage = NULL; | |
1f5596dd | 184 | |
6a4dceb1 | 185 | node = zalloc(sizeof(struct cg_proc_stat)); |
1f5596dd | 186 | if (!node) |
6a4dceb1 | 187 | return NULL; |
1f5596dd | 188 | |
6a4dceb1 | 189 | node->cg = strdup(cg); |
1f5596dd | 190 | if (!node->cg) |
6a4dceb1 | 191 | return NULL; |
1f5596dd | 192 | |
6a4dceb1 CB |
193 | new_usage = memdup(usage, sizeof(struct cpuacct_usage) * cpu_count); |
194 | if (!new_usage) | |
195 | return NULL; | |
1f5596dd | 196 | |
6a4dceb1 | 197 | node->view = zalloc(sizeof(struct cpuacct_usage) * cpu_count); |
1f5596dd | 198 | if (!node->view) |
6a4dceb1 | 199 | return NULL; |
1f5596dd CB |
200 | |
201 | node->cpu_count = cpu_count; | |
1f5596dd | 202 | |
6a4dceb1 CB |
203 | if (pthread_mutex_init(&node->lock, NULL)) |
204 | return NULL; | |
205 | /* | |
206 | * We're abusing the usage pointer to indicate that | |
207 | * pthread_mutex_init() was successful. Don't judge me. | |
208 | */ | |
209 | node->usage = move_ptr(new_usage); | |
1f5596dd | 210 | |
6a4dceb1 | 211 | return move_ptr(node); |
1f5596dd CB |
212 | } |
213 | ||
2d00d04c CB |
214 | static bool cgroup_supports(const char *controller, const char *cgroup, |
215 | const char *file) | |
4ec5c9da | 216 | { |
2c990b1d CB |
217 | __do_free char *path = NULL; |
218 | int cfd; | |
4ec5c9da CB |
219 | |
220 | cfd = get_cgroup_fd(controller); | |
221 | if (cfd < 0) | |
222 | return false; | |
223 | ||
925d5849 | 224 | path = must_make_path_relative(cgroup, file, NULL); |
2d00d04c | 225 | return faccessat(cfd, path, F_OK, 0) == 0; |
4ec5c9da CB |
226 | } |
227 | ||
54db3e71 | 228 | /* should be called with wr-locked list */ |
1f5596dd CB |
229 | static struct cg_proc_stat *prune_proc_stat_list(struct cg_proc_stat *node) |
230 | { | |
b456d40d | 231 | struct cg_proc_stat *first = NULL; |
1f5596dd | 232 | |
b456d40d | 233 | for (struct cg_proc_stat *prev = NULL; node; ) { |
2d00d04c | 234 | if (!cgroup_supports("cpu", node->cg, "cpu.shares")) { |
d5e34313 | 235 | struct cg_proc_stat *cur = node; |
1f5596dd | 236 | |
54db3e71 AM |
237 | /* |
238 | * We need to ensure that no one referenced this node, | |
239 | * because we are going to remove it from the list and free memory. | |
240 | * | |
241 | * If we can't grab the lock then just keep this node for now. | |
242 | */ | |
243 | if (pthread_mutex_trylock(&cur->lock)) | |
244 | goto next; | |
245 | ||
246 | /* | |
247 | * Yes, we can put lock back just after taking it, as we ensured | |
248 | * that we are only one user of it right now. | |
249 | * | |
250 | * It follows from three facts: | |
251 | * - we are under pthread_rwlock_wrlock(hash_table_bucket) | |
252 | * - pthread_mutex_lock is taken by find_proc_stat_node() | |
253 | * with pthread_rwlock_rdlock(hash_table_bucket) held. | |
254 | * - pthread_mutex_lock is taken by add_proc_stat_node() | |
255 | * with pthread_rwlock_wrlock(hash_table_bucket) held. | |
256 | * | |
257 | * It means that nobody can get a pointer to (cur) node in a parallel | |
258 | * thread and all old users of (cur) node have released pthread_mutex_lock(cur). | |
259 | */ | |
260 | pthread_mutex_unlock(&cur->lock); | |
261 | ||
1f5596dd CB |
262 | if (prev) |
263 | prev->next = node->next; | |
264 | else | |
265 | first = node->next; | |
266 | ||
267 | node = node->next; | |
d5e34313 CB |
268 | lxcfs_debug("Removing stat node for %s\n", cur); |
269 | ||
270 | free_proc_stat_node(cur); | |
1f5596dd | 271 | } else { |
54db3e71 | 272 | next: |
1f5596dd CB |
273 | if (!first) |
274 | first = node; | |
275 | prev = node; | |
276 | node = node->next; | |
277 | } | |
278 | } | |
279 | ||
280 | return first; | |
281 | } | |
282 | ||
283 | #define PROC_STAT_PRUNE_INTERVAL 10 | |
284 | static void prune_proc_stat_history(void) | |
285 | { | |
1f5596dd CB |
286 | time_t now = time(NULL); |
287 | ||
b456d40d | 288 | for (int i = 0; i < CPUVIEW_HASH_SIZE; i++) { |
1f5596dd CB |
289 | pthread_rwlock_wrlock(&proc_stat_history[i]->lock); |
290 | ||
291 | if ((proc_stat_history[i]->lastcheck + PROC_STAT_PRUNE_INTERVAL) > now) { | |
292 | pthread_rwlock_unlock(&proc_stat_history[i]->lock); | |
293 | return; | |
294 | } | |
295 | ||
296 | if (proc_stat_history[i]->next) { | |
297 | proc_stat_history[i]->next = prune_proc_stat_list(proc_stat_history[i]->next); | |
298 | proc_stat_history[i]->lastcheck = now; | |
299 | } | |
300 | ||
301 | pthread_rwlock_unlock(&proc_stat_history[i]->lock); | |
302 | } | |
303 | } | |
304 | ||
305 | static struct cg_proc_stat *find_proc_stat_node(struct cg_proc_stat_head *head, | |
306 | const char *cg) | |
307 | { | |
308 | struct cg_proc_stat *node; | |
309 | ||
310 | pthread_rwlock_rdlock(&head->lock); | |
311 | ||
312 | if (!head->next) { | |
313 | pthread_rwlock_unlock(&head->lock); | |
314 | return NULL; | |
315 | } | |
316 | ||
317 | node = head->next; | |
318 | ||
319 | do { | |
54db3e71 AM |
320 | if (strcmp(cg, node->cg) == 0) { |
321 | pthread_mutex_lock(&node->lock); | |
1f5596dd | 322 | goto out; |
54db3e71 | 323 | } |
1f5596dd CB |
324 | } while ((node = node->next)); |
325 | ||
326 | node = NULL; | |
327 | ||
328 | out: | |
329 | pthread_rwlock_unlock(&head->lock); | |
330 | prune_proc_stat_history(); | |
331 | return node; | |
332 | } | |
333 | ||
692f48eb CB |
334 | static struct cg_proc_stat *find_or_create_proc_stat_node(struct cpuacct_usage *usage, |
335 | int cpu_count, const char *cg) | |
1f5596dd CB |
336 | { |
337 | int hash = calc_hash(cg) % CPUVIEW_HASH_SIZE; | |
338 | struct cg_proc_stat_head *head = proc_stat_history[hash]; | |
339 | struct cg_proc_stat *node; | |
340 | ||
341 | node = find_proc_stat_node(head, cg); | |
1f5596dd CB |
342 | if (!node) { |
343 | node = new_proc_stat_node(usage, cpu_count, cg); | |
344 | if (!node) | |
345 | return NULL; | |
346 | ||
347 | node = add_proc_stat_node(node); | |
348 | lxcfs_debug("New stat node (%d) for %s\n", cpu_count, cg); | |
349 | } | |
350 | ||
ce089f10 CB |
351 | /* |
352 | * If additional CPUs on the host have been enabled, CPU usage counter | |
353 | * arrays have to be expanded. | |
354 | */ | |
1f5596dd CB |
355 | if (node->cpu_count < cpu_count) { |
356 | lxcfs_debug("Expanding stat node %d->%d for %s\n", | |
ce089f10 | 357 | node->cpu_count, cpu_count, cg); |
1f5596dd CB |
358 | |
359 | if (!expand_proc_stat_node(node, cpu_count)) { | |
360 | pthread_mutex_unlock(&node->lock); | |
b456d40d | 361 | return log_debug(NULL, "Unable to expand stat node %d->%d for %s", node->cpu_count, cpu_count, cg); |
1f5596dd CB |
362 | } |
363 | } | |
364 | ||
365 | return node; | |
366 | } | |
367 | ||
2b8eff1d CB |
368 | static void add_cpu_usage(uint64_t *surplus, struct cpuacct_usage *usage, |
369 | uint64_t *counter, uint64_t threshold) | |
1f5596dd | 370 | { |
1ba088ae | 371 | uint64_t free_space, to_add; |
1f5596dd CB |
372 | |
373 | free_space = threshold - usage->user - usage->system; | |
374 | ||
375 | if (free_space > usage->idle) | |
376 | free_space = usage->idle; | |
377 | ||
8206874a CB |
378 | if (free_space > *surplus) |
379 | to_add = *surplus; | |
380 | else | |
381 | to_add = free_space; | |
1f5596dd CB |
382 | |
383 | *counter += to_add; | |
384 | usage->idle -= to_add; | |
385 | *surplus -= to_add; | |
386 | } | |
387 | ||
1ba088ae CB |
388 | static uint64_t diff_cpu_usage(struct cpuacct_usage *older, |
389 | struct cpuacct_usage *newer, | |
390 | struct cpuacct_usage *diff, int cpu_count) | |
1f5596dd | 391 | { |
1ba088ae | 392 | uint64_t sum = 0; |
1f5596dd | 393 | |
b456d40d | 394 | for (int i = 0; i < cpu_count; i++) { |
1f5596dd CB |
395 | if (!newer[i].online) |
396 | continue; | |
397 | ||
b456d40d CB |
398 | /* |
399 | * When cpuset is changed on the fly, the CPUs might get | |
400 | * reordered. We could either reset all counters, or check | |
401 | * that the substractions below will return expected results. | |
1f5596dd CB |
402 | */ |
403 | if (newer[i].user > older[i].user) | |
404 | diff[i].user = newer[i].user - older[i].user; | |
405 | else | |
406 | diff[i].user = 0; | |
407 | ||
408 | if (newer[i].system > older[i].system) | |
409 | diff[i].system = newer[i].system - older[i].system; | |
410 | else | |
411 | diff[i].system = 0; | |
412 | ||
413 | if (newer[i].idle > older[i].idle) | |
414 | diff[i].idle = newer[i].idle - older[i].idle; | |
415 | else | |
416 | diff[i].idle = 0; | |
417 | ||
418 | sum += diff[i].user; | |
419 | sum += diff[i].system; | |
420 | sum += diff[i].idle; | |
421 | } | |
422 | ||
423 | return sum; | |
424 | } | |
425 | ||
426 | /* | |
b456d40d CB |
427 | * Read cgroup CPU quota parameters from `cpu.cfs_quota_us` or |
428 | * `cpu.cfs_period_us`, depending on `param`. Parameter value is returned | |
92264841 | 429 | * through `value`. |
1f5596dd CB |
430 | */ |
431 | static bool read_cpu_cfs_param(const char *cg, const char *param, int64_t *value) | |
432 | { | |
433 | __do_free char *str = NULL; | |
48f6862e | 434 | char file[STRLITERALLEN("cpu.cfs_period_us") + 1]; |
9844eea7 | 435 | bool first = true; |
48f6862e | 436 | int ret; |
1f5596dd | 437 | |
48f6862e | 438 | if (pure_unified_layout(cgroup_ops)) { |
9844eea7 | 439 | first = !strcmp(param, "quota"); |
48f6862e CB |
440 | ret = snprintf(file, sizeof(file), "cpu.max"); |
441 | } else { | |
442 | ret = snprintf(file, sizeof(file), "cpu.cfs_%s_us", param); | |
9844eea7 | 443 | } |
48f6862e | 444 | if (ret < 0 || (size_t)ret >= sizeof(file)) |
1f5596dd CB |
445 | return false; |
446 | ||
48f6862e | 447 | if (!cgroup_ops->get(cgroup_ops, "cpu", cg, file, &str)) |
1f5596dd CB |
448 | return false; |
449 | ||
48f6862e | 450 | return sscanf(str, first ? "%" PRId64 : "%*d %" PRId64, value) == 1; |
1f5596dd CB |
451 | } |
452 | ||
453 | /* | |
454 | * Return the exact number of visible CPUs based on CPU quotas. | |
455 | * If there is no quota set, zero is returned. | |
456 | */ | |
457 | static double exact_cpu_count(const char *cg) | |
458 | { | |
459 | double rv; | |
460 | int nprocs; | |
461 | int64_t cfs_quota, cfs_period; | |
462 | ||
c602a0d0 CB |
463 | if (!read_cpu_cfs_param(cg, "quota", &cfs_quota)) |
464 | return 0; | |
465 | ||
466 | if (!read_cpu_cfs_param(cg, "period", &cfs_period)) | |
467 | return 0; | |
1f5596dd CB |
468 | |
469 | if (cfs_quota <= 0 || cfs_period <= 0) | |
470 | return 0; | |
471 | ||
472 | rv = (double)cfs_quota / (double)cfs_period; | |
473 | ||
474 | nprocs = get_nprocs(); | |
475 | ||
476 | if (rv > nprocs) | |
477 | rv = nprocs; | |
478 | ||
479 | return rv; | |
480 | } | |
481 | ||
abc4d399 NPH |
482 | /* |
483 | * Return true if cfs quota of the cgroup is neg / not set | |
484 | */ | |
485 | static bool cfs_quota_disabled(const char *cg) | |
486 | { | |
487 | int64_t cfs_quota; | |
488 | ||
489 | if (!read_cpu_cfs_param(cg, "quota", &cfs_quota)) | |
490 | return true; | |
491 | ||
492 | return cfs_quota < 0; | |
493 | } | |
494 | ||
1f5596dd CB |
495 | /* |
496 | * Return the maximum number of visible CPUs based on CPU quotas. | |
177d6ecf | 497 | * If there is no quota set, cpu number in cpuset value is returned. |
1f5596dd | 498 | */ |
4ec5c9da | 499 | int max_cpu_count(const char *cg) |
1f5596dd | 500 | { |
700dd417 | 501 | __do_free char *cpuset = NULL; |
1f5596dd CB |
502 | int rv, nprocs; |
503 | int64_t cfs_quota, cfs_period; | |
504 | int nr_cpus_in_cpuset = 0; | |
1f5596dd | 505 | |
921bdfdb | 506 | if (!read_cpu_cfs_param(cg, "quota", &cfs_quota)) |
177d6ecf | 507 | cfs_quota = 0; |
921bdfdb CB |
508 | |
509 | if (!read_cpu_cfs_param(cg, "period", &cfs_period)) | |
177d6ecf | 510 | cfs_period = 0; |
1f5596dd CB |
511 | |
512 | cpuset = get_cpuset(cg); | |
513 | if (cpuset) | |
514 | nr_cpus_in_cpuset = cpu_number_in_cpuset(cpuset); | |
515 | ||
921bdfdb | 516 | if (cfs_quota <= 0 || cfs_period <= 0) { |
1f5596dd CB |
517 | if (nr_cpus_in_cpuset > 0) |
518 | return nr_cpus_in_cpuset; | |
519 | ||
520 | return 0; | |
521 | } | |
522 | ||
523 | rv = cfs_quota / cfs_period; | |
524 | ||
921bdfdb CB |
525 | /* |
526 | * In case quota/period does not yield a whole number, add one CPU for | |
1f5596dd CB |
527 | * the remainder. |
528 | */ | |
529 | if ((cfs_quota % cfs_period) > 0) | |
530 | rv += 1; | |
531 | ||
532 | nprocs = get_nprocs(); | |
1f5596dd CB |
533 | if (rv > nprocs) |
534 | rv = nprocs; | |
535 | ||
921bdfdb | 536 | /* Use min value in cpu quota and cpuset. */ |
1f5596dd CB |
537 | if (nr_cpus_in_cpuset > 0 && nr_cpus_in_cpuset < rv) |
538 | rv = nr_cpus_in_cpuset; | |
539 | ||
540 | return rv; | |
541 | } | |
542 | ||
543 | int cpuview_proc_stat(const char *cg, const char *cpuset, | |
544 | struct cpuacct_usage *cg_cpu_usage, int cg_cpu_usage_size, | |
545 | FILE *f, char *buf, size_t buf_size) | |
546 | { | |
547 | __do_free char *line = NULL; | |
548 | __do_free struct cpuacct_usage *diff = NULL; | |
4f18a602 | 549 | size_t linelen = 0, total_len = 0; |
1f5596dd CB |
550 | int curcpu = -1; /* cpu numbering starts at 0 */ |
551 | int physcpu, i; | |
39f231da | 552 | int cpu_cnt = 0; |
2b8eff1d CB |
553 | uint64_t user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, |
554 | softirq = 0, steal = 0, guest = 0, guest_nice = 0; | |
555 | uint64_t user_sum = 0, system_sum = 0, idle_sum = 0; | |
556 | uint64_t user_surplus = 0, system_surplus = 0; | |
39f231da | 557 | int nprocs, max_cpus; |
4f18a602 | 558 | ssize_t l; |
2b8eff1d | 559 | uint64_t total_sum, threshold; |
1f5596dd | 560 | struct cg_proc_stat *stat_node; |
1f5596dd | 561 | |
39f231da | 562 | nprocs = get_nprocs_conf(); |
1f5596dd CB |
563 | if (cg_cpu_usage_size < nprocs) |
564 | nprocs = cg_cpu_usage_size; | |
565 | ||
566 | /* Read all CPU stats and stop when we've encountered other lines */ | |
567 | while (getline(&line, &linelen, f) != -1) { | |
568 | int ret; | |
569 | char cpu_char[10]; /* That's a lot of cores */ | |
570 | uint64_t all_used, cg_used; | |
571 | ||
572 | if (strlen(line) == 0) | |
573 | continue; | |
574 | ||
575 | /* not a ^cpuN line containing a number N */ | |
576 | if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) | |
577 | break; | |
578 | ||
579 | if (sscanf(cpu_char, "%d", &physcpu) != 1) | |
580 | continue; | |
581 | ||
582 | if (physcpu >= cg_cpu_usage_size) | |
583 | continue; | |
584 | ||
fd65c77c CB |
585 | curcpu++; |
586 | cpu_cnt++; | |
1f5596dd CB |
587 | |
588 | if (!cpu_in_cpuset(physcpu, cpuset)) { | |
589 | for (i = curcpu; i <= physcpu; i++) | |
590 | cg_cpu_usage[i].online = false; | |
591 | continue; | |
592 | } | |
593 | ||
594 | if (curcpu < physcpu) { | |
595 | /* Some CPUs may be disabled */ | |
596 | for (i = curcpu; i < physcpu; i++) | |
597 | cg_cpu_usage[i].online = false; | |
598 | ||
599 | curcpu = physcpu; | |
600 | } | |
601 | ||
602 | cg_cpu_usage[curcpu].online = true; | |
603 | ||
2b8eff1d | 604 | ret = sscanf(line, "%*s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "lu", |
1f5596dd CB |
605 | &user, |
606 | &nice, | |
607 | &system, | |
608 | &idle, | |
609 | &iowait, | |
610 | &irq, | |
611 | &softirq, | |
612 | &steal, | |
613 | &guest, | |
614 | &guest_nice); | |
1f5596dd CB |
615 | if (ret != 10) |
616 | continue; | |
617 | ||
618 | all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice; | |
619 | cg_used = cg_cpu_usage[curcpu].user + cg_cpu_usage[curcpu].system; | |
620 | ||
621 | if (all_used >= cg_used) { | |
622 | cg_cpu_usage[curcpu].idle = idle + (all_used - cg_used); | |
1f5596dd | 623 | } else { |
cc49667a CB |
624 | lxcfs_v("cpu%d from %s has unexpected cpu time: %" PRIu64 " in /proc/stat, %" PRIu64 " in cpuacct.usage_all; unable to determine idle time", |
625 | curcpu, cg, all_used, cg_used); | |
1f5596dd CB |
626 | cg_cpu_usage[curcpu].idle = idle; |
627 | } | |
628 | } | |
629 | ||
f9434b9a CB |
630 | /* Cannot use more CPUs than is available in cpuset. */ |
631 | max_cpus = max_cpu_count(cg); | |
632 | if (max_cpus > cpu_cnt || !max_cpus) | |
633 | max_cpus = cpu_cnt; | |
634 | ||
692f48eb | 635 | /* takes lock pthread_mutex_lock(&node->lock) */ |
1f5596dd | 636 | stat_node = find_or_create_proc_stat_node(cg_cpu_usage, nprocs, cg); |
b456d40d CB |
637 | if (!stat_node) |
638 | return log_error(0, "Failed to find/create stat node for %s", cg); | |
1f5596dd | 639 | |
b4572722 | 640 | diff = zalloc(sizeof(struct cpuacct_usage) * nprocs); |
700dd417 | 641 | if (!diff) |
08d61303 | 642 | goto out_pthread_mutex_unlock; |
1f5596dd CB |
643 | |
644 | /* | |
645 | * If the new values are LOWER than values stored in memory, it means | |
646 | * the cgroup has been reset/recreated and we should reset too. | |
647 | */ | |
648 | for (curcpu = 0; curcpu < nprocs; curcpu++) { | |
649 | if (!cg_cpu_usage[curcpu].online) | |
650 | continue; | |
651 | ||
652 | if (cg_cpu_usage[curcpu].user < stat_node->usage[curcpu].user) | |
653 | reset_proc_stat_node(stat_node, cg_cpu_usage, nprocs); | |
654 | ||
655 | break; | |
656 | } | |
657 | ||
658 | total_sum = diff_cpu_usage(stat_node->usage, cg_cpu_usage, diff, nprocs); | |
659 | ||
660 | for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) { | |
661 | stat_node->usage[curcpu].online = cg_cpu_usage[curcpu].online; | |
662 | ||
663 | if (!stat_node->usage[curcpu].online) | |
664 | continue; | |
665 | ||
666 | i++; | |
667 | ||
b4572722 | 668 | stat_node->usage[curcpu].user += diff[curcpu].user; |
1f5596dd | 669 | stat_node->usage[curcpu].system += diff[curcpu].system; |
b4572722 | 670 | stat_node->usage[curcpu].idle += diff[curcpu].idle; |
1f5596dd CB |
671 | |
672 | if (max_cpus > 0 && i >= max_cpus) { | |
b4572722 CB |
673 | user_surplus += diff[curcpu].user; |
674 | system_surplus += diff[curcpu].system; | |
1f5596dd CB |
675 | } |
676 | } | |
677 | ||
678 | /* Calculate usage counters of visible CPUs */ | |
679 | if (max_cpus > 0) { | |
2b8eff1d CB |
680 | uint64_t diff_user = 0; |
681 | uint64_t diff_system = 0; | |
682 | uint64_t diff_idle = 0; | |
683 | uint64_t max_diff_idle = 0; | |
684 | uint64_t max_diff_idle_index = 0; | |
1f5596dd | 685 | double exact_cpus; |
1f5596dd CB |
686 | /* threshold = maximum usage per cpu, including idle */ |
687 | threshold = total_sum / cpu_cnt * max_cpus; | |
688 | ||
689 | for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) { | |
690 | if (!stat_node->usage[curcpu].online) | |
691 | continue; | |
692 | ||
693 | i++; | |
694 | ||
695 | if (i == max_cpus) | |
696 | break; | |
697 | ||
698 | if (diff[curcpu].user + diff[curcpu].system >= threshold) | |
699 | continue; | |
700 | ||
701 | /* Add user */ | |
702 | add_cpu_usage(&user_surplus, &diff[curcpu], | |
703 | &diff[curcpu].user, threshold); | |
704 | ||
705 | if (diff[curcpu].user + diff[curcpu].system >= threshold) | |
706 | continue; | |
707 | ||
708 | /* If there is still room, add system */ | |
709 | add_cpu_usage(&system_surplus, &diff[curcpu], | |
710 | &diff[curcpu].system, threshold); | |
711 | } | |
712 | ||
713 | if (user_surplus > 0) | |
1ea6aaf2 | 714 | lxcfs_debug("leftover user: %" PRIu64 "for %s\n", user_surplus, cg); |
1f5596dd | 715 | if (system_surplus > 0) |
1ea6aaf2 | 716 | lxcfs_debug("leftover system: %" PRIu64 "for %s\n", system_surplus, cg); |
1f5596dd CB |
717 | |
718 | for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) { | |
719 | if (!stat_node->usage[curcpu].online) | |
720 | continue; | |
721 | ||
722 | i++; | |
723 | ||
724 | if (i == max_cpus) | |
725 | break; | |
726 | ||
b4572722 CB |
727 | stat_node->view[curcpu].user += diff[curcpu].user; |
728 | stat_node->view[curcpu].system += diff[curcpu].system; | |
729 | stat_node->view[curcpu].idle += diff[curcpu].idle; | |
1f5596dd | 730 | |
b4572722 CB |
731 | diff_user += diff[curcpu].user; |
732 | diff_system += diff[curcpu].system; | |
733 | diff_idle += diff[curcpu].idle; | |
1f5596dd | 734 | if (diff[curcpu].idle > max_diff_idle) { |
b4572722 CB |
735 | max_diff_idle = diff[curcpu].idle; |
736 | max_diff_idle_index = curcpu; | |
1f5596dd CB |
737 | } |
738 | ||
1ea6aaf2 | 739 | lxcfs_v("curcpu: %d, diff_user: %" PRIu64 ", diff_system: %" PRIu64 ", diff_idle: %" PRIu64 "\n", curcpu, diff[curcpu].user, diff[curcpu].system, diff[curcpu].idle); |
1f5596dd | 740 | } |
1ea6aaf2 | 741 | lxcfs_v("total. diff_user: %" PRIu64 ", diff_system: %" PRIu64 ", diff_idle: %" PRIu64 "\n", diff_user, diff_system, diff_idle); |
1f5596dd | 742 | |
2b37a10e NPH |
743 | for (curcpu = 0; curcpu < nprocs; curcpu++) { |
744 | user_sum += stat_node->view[curcpu].user; | |
745 | system_sum += stat_node->view[curcpu].system; | |
746 | idle_sum += stat_node->view[curcpu].idle; | |
747 | } | |
748 | ||
1f5596dd CB |
749 | /* revise cpu usage view to support partial cpu case. */ |
750 | exact_cpus = exact_cpu_count(cg); | |
abc4d399 NPH |
751 | |
752 | /* skip revise cpu when cfs quota is disabled (exact_cpus == 0) */ | |
753 | if (!cfs_quota_disabled(cg) && exact_cpus < (double)max_cpus){ | |
1ba088ae | 754 | uint64_t delta = (uint64_t)((double)(diff_user + diff_system + diff_idle) * (1 - exact_cpus / (double)max_cpus)); |
1f5596dd CB |
755 | |
756 | lxcfs_v("revising cpu usage view to match the exact cpu count [%f]\n", exact_cpus); | |
1ea6aaf2 CB |
757 | lxcfs_v("delta: %" PRIu64 "\n", delta); |
758 | lxcfs_v("idle_sum before: %" PRIu64 "\n", idle_sum); | |
b4572722 CB |
759 | if (idle_sum > delta) |
760 | idle_sum = idle_sum - delta; | |
761 | else | |
762 | idle_sum = 0; | |
1ea6aaf2 | 763 | lxcfs_v("idle_sum after: %l" PRIu64 "\n", idle_sum); |
1f5596dd CB |
764 | |
765 | curcpu = max_diff_idle_index; | |
1ea6aaf2 | 766 | lxcfs_v("curcpu: %d, idle before: %" PRIu64 "\n", curcpu, stat_node->view[curcpu].idle); |
b4572722 CB |
767 | if (stat_node->view[curcpu].idle > delta) |
768 | stat_node->view[curcpu].idle = stat_node->view[curcpu].idle - delta; | |
769 | else | |
770 | stat_node->view[curcpu].idle = 0; | |
1ea6aaf2 | 771 | lxcfs_v("curcpu: %d, idle after: %" PRIu64 "\n", curcpu, stat_node->view[curcpu].idle); |
1f5596dd CB |
772 | } |
773 | } else { | |
774 | for (curcpu = 0; curcpu < nprocs; curcpu++) { | |
775 | if (!stat_node->usage[curcpu].online) | |
776 | continue; | |
777 | ||
b4572722 CB |
778 | stat_node->view[curcpu].user = stat_node->usage[curcpu].user; |
779 | stat_node->view[curcpu].system = stat_node->usage[curcpu].system; | |
780 | stat_node->view[curcpu].idle = stat_node->usage[curcpu].idle; | |
1f5596dd | 781 | |
b4572722 CB |
782 | user_sum += stat_node->view[curcpu].user; |
783 | system_sum += stat_node->view[curcpu].system; | |
784 | idle_sum += stat_node->view[curcpu].idle; | |
1f5596dd CB |
785 | } |
786 | } | |
787 | ||
788 | /* Render the file */ | |
789 | /* cpu-all */ | |
2b8eff1d CB |
790 | l = snprintf(buf, buf_size, |
791 | "cpu %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n", | |
792 | user_sum, system_sum, idle_sum); | |
1f5596dd | 793 | lxcfs_v("cpu-all: %s\n", buf); |
692f48eb CB |
794 | if (l < 0) { |
795 | lxcfs_error("Failed to write cache"); | |
796 | total_len = 0; | |
797 | goto out_pthread_mutex_unlock; | |
798 | } | |
3cf1e562 | 799 | if ((size_t)l >= buf_size) { |
08d61303 CB |
800 | lxcfs_error("Write to cache was truncated"); |
801 | total_len = 0; | |
802 | goto out_pthread_mutex_unlock; | |
803 | } | |
1f5596dd CB |
804 | |
805 | buf += l; | |
806 | buf_size -= l; | |
807 | total_len += l; | |
808 | ||
2b37a10e NPH |
809 | /* Render visible CPUs |
810 | Assume there are K CPUs: 0, 1, 2, ..., K-1. | |
811 | Among them, there are M online CPUs with index: a1, a2, ... aN ... aM (M >= N) | |
812 | N = max_cpus, M = number of online CPUs | |
813 | ||
814 | There will be N rendered cpus, indexed from 0 to N-1, cpu times of the cpus are calculated from those formula: | |
815 | - user_time[0] = stat_node->view[0].user + stat_node->view[1].user + ... + stat_node->view[a1].user | |
816 | - user_time[1] = stat_node->view[a1+1].user + stat_node->view[a1+1].user + ... + stat_node->view[a2].user | |
817 | ... | |
818 | - user_time[N-2] = stat_node->view[a(N-2)+1].user + stat_node->view[a(N-2)+2].user + ... | |
819 | + stat_node->view[a(N-1)].user | |
820 | - user_time[N-1] = stat_node->view[a(N-1)+1].user + stat_node->view[a(N-1)+2].user + ... | |
821 | + stat_node->view[aN] + ... + stat_node->view[K-1] (sum of all remaining CPUs) | |
822 | ||
823 | Similar formula applied for system and idle time | |
824 | */ | |
825 | ||
826 | uint64_t curcpu_view_user_sum = 0, curcpu_view_system_sum = 0, curcpu_view_idle_sum = 0; | |
1f5596dd | 827 | for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) { |
2b37a10e NPH |
828 | curcpu_view_user_sum += stat_node->view[curcpu].user; |
829 | curcpu_view_system_sum += stat_node->view[curcpu].system; | |
830 | curcpu_view_idle_sum += stat_node->view[curcpu].idle; | |
1f5596dd | 831 | |
2b37a10e NPH |
832 | if (!stat_node->usage[curcpu].online && curcpu < nprocs - 1) { |
833 | continue; | |
834 | } | |
835 | ||
1f5596dd CB |
836 | i++; |
837 | ||
2b37a10e NPH |
838 | if (max_cpus > 0 && i >= max_cpus) { |
839 | // max(i) = count(rendered cpus) = max_cpus - 1 | |
840 | i--; | |
841 | } | |
842 | ||
843 | if (max_cpus > 0 && i == max_cpus - 1 && curcpu < nprocs - 1) { | |
844 | // last 'rendered' cpu, sum until reaches the last cpu | |
845 | continue; | |
846 | } | |
1f5596dd | 847 | |
2b8eff1d CB |
848 | l = snprintf(buf, buf_size, "cpu%d %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n", |
849 | i, | |
2b37a10e NPH |
850 | curcpu_view_user_sum, |
851 | curcpu_view_system_sum, | |
852 | curcpu_view_idle_sum); | |
1f5596dd | 853 | lxcfs_v("cpu: %s\n", buf); |
692f48eb CB |
854 | if (l < 0) { |
855 | lxcfs_error("Failed to write cache"); | |
856 | total_len = 0; | |
857 | goto out_pthread_mutex_unlock; | |
858 | } | |
3cf1e562 | 859 | if ((size_t)l >= buf_size) { |
692f48eb CB |
860 | lxcfs_error("Write to cache was truncated"); |
861 | total_len = 0; | |
862 | goto out_pthread_mutex_unlock; | |
863 | } | |
1f5596dd CB |
864 | |
865 | buf += l; | |
866 | buf_size -= l; | |
867 | total_len += l; | |
2b37a10e NPH |
868 | |
869 | curcpu_view_user_sum = 0; | |
870 | curcpu_view_system_sum = 0; | |
871 | curcpu_view_idle_sum = 0; | |
1f5596dd CB |
872 | } |
873 | ||
874 | /* Pass the rest of /proc/stat, start with the last line read */ | |
875 | l = snprintf(buf, buf_size, "%s", line); | |
692f48eb CB |
876 | if (l < 0) { |
877 | lxcfs_error("Failed to write cache"); | |
878 | total_len = 0; | |
879 | goto out_pthread_mutex_unlock; | |
880 | } | |
3cf1e562 | 881 | if ((size_t)l >= buf_size) { |
692f48eb CB |
882 | lxcfs_error("Write to cache was truncated"); |
883 | total_len = 0; | |
884 | goto out_pthread_mutex_unlock; | |
885 | } | |
1f5596dd CB |
886 | |
887 | buf += l; | |
888 | buf_size -= l; | |
889 | total_len += l; | |
890 | ||
891 | /* Pass the rest of the host's /proc/stat */ | |
892 | while (getline(&line, &linelen, f) != -1) { | |
893 | l = snprintf(buf, buf_size, "%s", line); | |
692f48eb CB |
894 | if (l < 0) { |
895 | lxcfs_error("Failed to write cache"); | |
896 | total_len = 0; | |
897 | goto out_pthread_mutex_unlock; | |
898 | } | |
3cf1e562 | 899 | if ((size_t)l >= buf_size) { |
692f48eb CB |
900 | lxcfs_error("Write to cache was truncated"); |
901 | total_len = 0; | |
902 | goto out_pthread_mutex_unlock; | |
903 | } | |
b456d40d | 904 | |
1f5596dd CB |
905 | buf += l; |
906 | buf_size -= l; | |
907 | total_len += l; | |
908 | } | |
909 | ||
692f48eb | 910 | out_pthread_mutex_unlock: |
1f5596dd CB |
911 | if (stat_node) |
912 | pthread_mutex_unlock(&stat_node->lock); | |
b456d40d | 913 | |
1f5596dd CB |
914 | return total_len; |
915 | } | |
916 | ||
917 | /* | |
918 | * check whether this is a '^processor" line in /proc/cpuinfo | |
919 | */ | |
b456d40d | 920 | static inline bool is_processor_line(const char *line) |
1f5596dd CB |
921 | { |
922 | int cpu; | |
b456d40d | 923 | return sscanf(line, "processor : %d", &cpu) == 1; |
1f5596dd CB |
924 | } |
925 | ||
b456d40d | 926 | static inline bool cpuline_in_cpuset(const char *line, const char *cpuset) |
1f5596dd CB |
927 | { |
928 | int cpu; | |
c539526c CB |
929 | |
930 | if (sscanf(line, "processor : %d", &cpu) == 1) | |
931 | return cpu_in_cpuset(cpu, cpuset); | |
932 | ||
933 | return false; | |
1f5596dd CB |
934 | } |
935 | ||
936 | int proc_cpuinfo_read(char *buf, size_t size, off_t offset, | |
937 | struct fuse_file_info *fi) | |
938 | { | |
939 | __do_free char *cg = NULL, *cpuset = NULL, *line = NULL; | |
757a63e7 | 940 | __do_free void *fopen_cache = NULL; |
1f5596dd CB |
941 | __do_fclose FILE *f = NULL; |
942 | struct fuse_context *fc = fuse_get_context(); | |
0274438c | 943 | struct lxcfs_opts *opts = (struct lxcfs_opts *)fc->private_data; |
99b183fb | 944 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
1f5596dd CB |
945 | size_t linelen = 0, total_len = 0; |
946 | bool am_printing = false, firstline = true, is_s390x = false; | |
947 | int curcpu = -1, cpu, max_cpus = 0; | |
948 | bool use_view; | |
949 | char *cache = d->buf; | |
950 | size_t cache_size = d->buflen; | |
951 | ||
f9434b9a | 952 | if (offset) { |
3cf1e562 | 953 | size_t left; |
1f5596dd CB |
954 | |
955 | if (offset > d->size) | |
956 | return -EINVAL; | |
957 | ||
958 | if (!d->cached) | |
959 | return 0; | |
960 | ||
961 | left = d->size - offset; | |
962 | total_len = left > size ? size: left; | |
963 | memcpy(buf, cache + offset, total_len); | |
964 | ||
965 | return total_len; | |
966 | } | |
967 | ||
968 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
969 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
970 | initpid = fc->pid; | |
b456d40d | 971 | |
1f5596dd CB |
972 | cg = get_pid_cgroup(initpid, "cpuset"); |
973 | if (!cg) | |
974 | return read_file_fuse("proc/cpuinfo", buf, size, d); | |
975 | prune_init_slice(cg); | |
976 | ||
977 | cpuset = get_cpuset(cg); | |
978 | if (!cpuset) | |
979 | return 0; | |
980 | ||
8044f626 | 981 | if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts && opts->use_cfs) |
0274438c | 982 | use_view = true; |
8044f626 CB |
983 | else |
984 | use_view = false; | |
1f5596dd CB |
985 | if (use_view) |
986 | max_cpus = max_cpu_count(cg); | |
987 | ||
757a63e7 | 988 | f = fopen_cached("/proc/cpuinfo", "re", &fopen_cache); |
1f5596dd CB |
989 | if (!f) |
990 | return 0; | |
991 | ||
992 | while (getline(&line, &linelen, f) != -1) { | |
993 | ssize_t l; | |
994 | if (firstline) { | |
995 | firstline = false; | |
996 | if (strstr(line, "IBM/S390") != NULL) { | |
997 | is_s390x = true; | |
998 | am_printing = true; | |
999 | continue; | |
1000 | } | |
1001 | } | |
b456d40d | 1002 | |
1f5596dd CB |
1003 | if (strncmp(line, "# processors:", 12) == 0) |
1004 | continue; | |
b456d40d | 1005 | |
1f5596dd | 1006 | if (is_processor_line(line)) { |
d0031abf | 1007 | if (use_view && max_cpus > 0 && (curcpu + 1) == max_cpus) |
1f5596dd | 1008 | break; |
b456d40d | 1009 | |
1f5596dd CB |
1010 | am_printing = cpuline_in_cpuset(line, cpuset); |
1011 | if (am_printing) { | |
d0031abf | 1012 | curcpu++; |
1f5596dd | 1013 | l = snprintf(cache, cache_size, "processor : %d\n", curcpu); |
b456d40d CB |
1014 | if (l < 0) |
1015 | return log_error(0, "Failed to write cache"); | |
3cf1e562 | 1016 | if ((size_t)l >= cache_size) |
b456d40d | 1017 | return log_error(0, "Write to cache was truncated"); |
1f5596dd CB |
1018 | cache += l; |
1019 | cache_size -= l; | |
1020 | total_len += l; | |
1021 | } | |
1022 | continue; | |
1023 | } else if (is_s390x && sscanf(line, "processor %d:", &cpu) == 1) { | |
1024 | char *p; | |
b456d40d | 1025 | |
d0031abf | 1026 | if (use_view && max_cpus > 0 && (curcpu + 1) == max_cpus) |
1f5596dd | 1027 | break; |
b456d40d | 1028 | |
1f5596dd CB |
1029 | if (!cpu_in_cpuset(cpu, cpuset)) |
1030 | continue; | |
b456d40d | 1031 | |
1f5596dd CB |
1032 | curcpu ++; |
1033 | p = strchr(line, ':'); | |
1034 | if (!p || !*p) | |
1035 | return 0; | |
1036 | p++; | |
b456d40d | 1037 | |
1f5596dd | 1038 | l = snprintf(cache, cache_size, "processor %d:%s", curcpu, p); |
b456d40d CB |
1039 | if (l < 0) |
1040 | return log_error(0, "Failed to write cache"); | |
3cf1e562 | 1041 | if ((size_t)l >= cache_size) |
b456d40d CB |
1042 | return log_error(0, "Write to cache was truncated"); |
1043 | ||
1f5596dd CB |
1044 | cache += l; |
1045 | cache_size -= l; | |
1046 | total_len += l; | |
1047 | continue; | |
1048 | ||
1049 | } | |
1050 | if (am_printing) { | |
1051 | l = snprintf(cache, cache_size, "%s", line); | |
b456d40d CB |
1052 | if (l < 0) |
1053 | return log_error(0, "Failed to write cache"); | |
3cf1e562 | 1054 | if ((size_t)l >= cache_size) |
b456d40d CB |
1055 | return log_error(0, "Write to cache was truncated"); |
1056 | ||
1f5596dd CB |
1057 | cache += l; |
1058 | cache_size -= l; | |
1059 | total_len += l; | |
1060 | } | |
1061 | } | |
1062 | ||
1063 | if (is_s390x) { | |
1064 | __do_free char *origcache = d->buf; | |
1065 | ssize_t l; | |
1066 | ||
1067 | d->buf = malloc(d->buflen); | |
1068 | if (!d->buf) { | |
1069 | d->buf = move_ptr(origcache); | |
1070 | return 0; | |
1071 | } | |
1072 | ||
1073 | cache = d->buf; | |
1074 | cache_size = d->buflen; | |
1075 | total_len = 0; | |
1076 | l = snprintf(cache, cache_size, "vendor_id : IBM/S390\n"); | |
3cf1e562 | 1077 | if (l < 0 || (size_t)l >= cache_size) |
1f5596dd CB |
1078 | return 0; |
1079 | ||
1080 | cache_size -= l; | |
1081 | cache += l; | |
1082 | total_len += l; | |
1083 | l = snprintf(cache, cache_size, "# processors : %d\n", curcpu + 1); | |
3cf1e562 | 1084 | if (l < 0 || (size_t)l >= cache_size) |
1f5596dd CB |
1085 | return 0; |
1086 | ||
1087 | cache_size -= l; | |
1088 | cache += l; | |
1089 | total_len += l; | |
1090 | l = snprintf(cache, cache_size, "%s", origcache); | |
3cf1e562 | 1091 | if (l < 0 || (size_t)l >= cache_size) |
1f5596dd CB |
1092 | return 0; |
1093 | total_len += l; | |
1094 | } | |
1095 | ||
1096 | d->cached = 1; | |
1097 | d->size = total_len; | |
d0031abf CB |
1098 | if (total_len > size) |
1099 | total_len = size; | |
1f5596dd CB |
1100 | |
1101 | /* read from off 0 */ | |
1102 | memcpy(buf, d->buf, total_len); | |
d0031abf | 1103 | |
1f5596dd CB |
1104 | return total_len; |
1105 | } | |
1106 | ||
1107 | /* | |
1108 | * Returns 0 on success. | |
1109 | * It is the caller's responsibility to free `return_usage`, unless this | |
1110 | * function returns an error. | |
1111 | */ | |
1112 | int read_cpuacct_usage_all(char *cg, char *cpuset, | |
1113 | struct cpuacct_usage **return_usage, int *size) | |
1114 | { | |
1115 | __do_free char *usage_str = NULL; | |
1116 | __do_free struct cpuacct_usage *cpu_usage = NULL; | |
9ce186dc | 1117 | int i = 0, j = 0, read_pos = 0, read_cnt = 0; |
8b6987a2 | 1118 | int cpucount; |
9ce186dc | 1119 | int ret; |
1f5596dd CB |
1120 | int cg_cpu; |
1121 | uint64_t cg_user, cg_system; | |
1122 | int64_t ticks_per_sec; | |
1123 | ||
1124 | ticks_per_sec = sysconf(_SC_CLK_TCK); | |
1f5596dd | 1125 | if (ticks_per_sec < 0 && errno == EINVAL) { |
8b6987a2 | 1126 | lxcfs_debug("%m - Failed to determine number of ticks per second"); |
1f5596dd CB |
1127 | return -1; |
1128 | } | |
1129 | ||
f9434b9a | 1130 | cpucount = get_nprocs_conf(); |
1f5596dd CB |
1131 | cpu_usage = malloc(sizeof(struct cpuacct_usage) * cpucount); |
1132 | if (!cpu_usage) | |
1133 | return -ENOMEM; | |
1134 | ||
1135 | memset(cpu_usage, 0, sizeof(struct cpuacct_usage) * cpucount); | |
1136 | if (!cgroup_ops->get(cgroup_ops, "cpuacct", cg, "cpuacct.usage_all", &usage_str)) { | |
8b6987a2 CB |
1137 | char *sep = " \t\n"; |
1138 | char *tok; | |
1f5596dd | 1139 | |
8b6987a2 CB |
1140 | /* Read cpuacct.usage_percpu instead. */ |
1141 | lxcfs_debug("Falling back to cpuacct.usage_percpu"); | |
1f5596dd CB |
1142 | if (!cgroup_ops->get(cgroup_ops, "cpuacct", cg, "cpuacct.usage_percpu", &usage_str)) |
1143 | return -1; | |
1f5596dd | 1144 | |
8b6987a2 CB |
1145 | lxc_iterate_parts(tok, usage_str, sep) { |
1146 | uint64_t percpu_user; | |
1147 | ||
1148 | if (i >= cpucount) | |
1149 | break; | |
1f5596dd | 1150 | |
8b6987a2 CB |
1151 | tok = trim_whitespace_in_place(tok); |
1152 | ret = safe_uint64(tok, &percpu_user, 10); | |
1153 | if (ret) | |
1154 | return -1; | |
1f5596dd | 1155 | |
8b6987a2 CB |
1156 | /* Convert the time from nanoseconds to USER_HZ */ |
1157 | cpu_usage[i].user = percpu_user / 1000.0 / 1000 / 1000 * ticks_per_sec; | |
1158 | cpu_usage[i].system = cpu_usage[i].user; | |
1f5596dd | 1159 | i++; |
8b6987a2 | 1160 | lxcfs_debug("cpu%d with time %s", i, tok); |
1f5596dd | 1161 | } |
8b6987a2 CB |
1162 | } else { |
1163 | if (sscanf(usage_str, "cpu user system\n%n", &read_cnt) != 0) | |
1164 | return log_error(-1, "read_cpuacct_usage_all reading first line from %s/cpuacct.usage_all failed", cg); | |
1f5596dd | 1165 | |
8b6987a2 | 1166 | read_pos += read_cnt; |
1f5596dd | 1167 | |
8b6987a2 CB |
1168 | for (i = 0, j = 0; i < cpucount; i++) { |
1169 | ret = sscanf(usage_str + read_pos, | |
1170 | "%d %" PRIu64 " %" PRIu64 "\n%n", &cg_cpu, | |
1171 | &cg_user, &cg_system, &read_cnt); | |
1f5596dd | 1172 | |
8b6987a2 CB |
1173 | if (ret == EOF) |
1174 | break; | |
1f5596dd | 1175 | |
8b6987a2 CB |
1176 | if (ret != 3) |
1177 | return log_error(-EINVAL, "Failed to parse cpuacct.usage_all line %s from cgroup %s", | |
1178 | usage_str + read_pos, cg); | |
1f5596dd | 1179 | |
8b6987a2 | 1180 | read_pos += read_cnt; |
1f5596dd | 1181 | |
8b6987a2 CB |
1182 | /* Convert the time from nanoseconds to USER_HZ */ |
1183 | cpu_usage[j].user = cg_user / 1000.0 / 1000 / 1000 * ticks_per_sec; | |
1184 | cpu_usage[j].system = cg_system / 1000.0 / 1000 / 1000 * ticks_per_sec; | |
1185 | j++; | |
1186 | } | |
1f5596dd CB |
1187 | } |
1188 | ||
1189 | *return_usage = move_ptr(cpu_usage); | |
1190 | *size = cpucount; | |
1191 | return 0; | |
1192 | } | |
1193 | ||
1194 | static bool cpuview_init_head(struct cg_proc_stat_head **head) | |
1195 | { | |
9d7fc1a3 | 1196 | __do_free struct cg_proc_stat_head *h; |
1f5596dd | 1197 | |
9d7fc1a3 CB |
1198 | h = zalloc(sizeof(struct cg_proc_stat_head)); |
1199 | if (!h) | |
1200 | return false; | |
1f5596dd | 1201 | |
9d7fc1a3 CB |
1202 | if (pthread_rwlock_init(&h->lock, NULL)) |
1203 | return false; | |
1204 | ||
1205 | h->lastcheck = time(NULL); | |
1f5596dd | 1206 | |
9d7fc1a3 | 1207 | *head = move_ptr(h); |
1f5596dd CB |
1208 | return true; |
1209 | } | |
1210 | ||
4ec5c9da | 1211 | bool init_cpuview(void) |
1f5596dd CB |
1212 | { |
1213 | int i; | |
1214 | ||
1215 | for (i = 0; i < CPUVIEW_HASH_SIZE; i++) | |
1216 | proc_stat_history[i] = NULL; | |
1217 | ||
1218 | for (i = 0; i < CPUVIEW_HASH_SIZE; i++) { | |
1219 | if (!cpuview_init_head(&proc_stat_history[i])) | |
1220 | goto err; | |
1221 | } | |
1222 | ||
1223 | return true; | |
1224 | ||
1225 | err: | |
1226 | for (i = 0; i < CPUVIEW_HASH_SIZE; i++) { | |
1227 | if (proc_stat_history[i]) | |
1228 | free_disarm(proc_stat_history[i]); | |
1229 | } | |
1230 | ||
1231 | return false; | |
1232 | } | |
1233 | ||
1f5596dd CB |
1234 | static void cpuview_free_head(struct cg_proc_stat_head *head) |
1235 | { | |
905769cd | 1236 | struct cg_proc_stat *node; |
1f5596dd CB |
1237 | |
1238 | if (head->next) { | |
1239 | node = head->next; | |
1240 | ||
1241 | for (;;) { | |
905769cd | 1242 | struct cg_proc_stat *cur = node; |
1f5596dd | 1243 | node = node->next; |
905769cd | 1244 | free_proc_stat_node(cur); |
1f5596dd CB |
1245 | if (!node) |
1246 | break; | |
1247 | } | |
1248 | } | |
1249 | ||
1250 | pthread_rwlock_destroy(&head->lock); | |
1251 | free_disarm(head); | |
1252 | } | |
1253 | ||
4ec5c9da | 1254 | void free_cpuview(void) |
1f5596dd | 1255 | { |
4ec5c9da | 1256 | for (int i = 0; i < CPUVIEW_HASH_SIZE; i++) |
1f5596dd CB |
1257 | if (proc_stat_history[i]) |
1258 | cpuview_free_head(proc_stat_history[i]); | |
1f5596dd | 1259 | } |