]> git.proxmox.com Git - mirror_lxcfs.git/blame - proc_cpuview.c
cpuset_parse: cleanup
[mirror_lxcfs.git] / proc_cpuview.c
CommitLineData
1f5596dd
CB
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6
7#ifndef FUSE_USE_VERSION
8#define FUSE_USE_VERSION 26
9#endif
10
11#define _FILE_OFFSET_BITS 64
12
13#define __STDC_FORMAT_MACROS
14#include <dirent.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <fuse.h>
18#include <inttypes.h>
19#include <libgen.h>
20#include <pthread.h>
21#include <sched.h>
22#include <stdarg.h>
23#include <stdbool.h>
24#include <stdint.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <time.h>
29#include <unistd.h>
30#include <wait.h>
31#include <linux/magic.h>
32#include <linux/sched.h>
33#include <sys/epoll.h>
34#include <sys/mman.h>
35#include <sys/mount.h>
36#include <sys/param.h>
37#include <sys/socket.h>
38#include <sys/syscall.h>
39#include <sys/sysinfo.h>
40#include <sys/vfs.h>
41
42#include "bindings.h"
43#include "config.h"
44#include "cgroup_fuse.h"
45#include "cpuset_parse.h"
46#include "cgroups/cgroup.h"
47#include "cgroups/cgroup_utils.h"
48#include "memory_utils.h"
4ec5c9da 49#include "proc_loadavg.h"
1f5596dd
CB
50#include "utils.h"
51
1f5596dd
CB
52/* Data for CPU view */
53struct cg_proc_stat {
54 char *cg;
55 struct cpuacct_usage *usage; // Real usage as read from the host's /proc/stat
56 struct cpuacct_usage *view; // Usage stats reported to the container
57 int cpu_count;
58 pthread_mutex_t lock; // For node manipulation
59 struct cg_proc_stat *next;
60};
61
62struct cg_proc_stat_head {
63 struct cg_proc_stat *next;
64 time_t lastcheck;
65
66 /*
67 * For access to the list. Reading can be parallel, pruning is exclusive.
68 */
69 pthread_rwlock_t lock;
70};
71
72#define CPUVIEW_HASH_SIZE 100
73static struct cg_proc_stat_head *proc_stat_history[CPUVIEW_HASH_SIZE];
74
75static void reset_proc_stat_node(struct cg_proc_stat *node, struct cpuacct_usage *usage, int cpu_count)
76{
77 int i;
78
79 lxcfs_debug("Resetting stat node for %s\n", node->cg);
80 memcpy(node->usage, usage, sizeof(struct cpuacct_usage) * cpu_count);
81
82 for (i = 0; i < cpu_count; i++) {
83 node->view[i].user = 0;
84 node->view[i].system = 0;
85 node->view[i].idle = 0;
86 }
87
88 node->cpu_count = cpu_count;
89}
90
91static bool expand_proc_stat_node(struct cg_proc_stat *node, int cpu_count)
92{
93 __do_free struct cpuacct_usage *new_usage = NULL, *new_view = NULL;
94
95 /* Allocate new memory */
96 new_usage = malloc(sizeof(struct cpuacct_usage) * cpu_count);
97 if (!new_usage)
98 return false;
99
100 new_view = malloc(sizeof(struct cpuacct_usage) * cpu_count);
101 if (!new_view)
102 return false;
103
104 /* Copy existing data & initialize new elements */
105 for (int i = 0; i < cpu_count; i++) {
106 if (i < node->cpu_count) {
107 new_usage[i].user = node->usage[i].user;
108 new_usage[i].system = node->usage[i].system;
109 new_usage[i].idle = node->usage[i].idle;
110
111 new_view[i].user = node->view[i].user;
112 new_view[i].system = node->view[i].system;
113 new_view[i].idle = node->view[i].idle;
114 } else {
115 new_usage[i].user = 0;
116 new_usage[i].system = 0;
117 new_usage[i].idle = 0;
118
119 new_view[i].user = 0;
120 new_view[i].system = 0;
121 new_view[i].idle = 0;
122 }
123 }
124
125 free(node->usage);
126 node->usage = move_ptr(new_usage);
127
128 free(node->view);
129 node->view = move_ptr(new_view);
130 node->cpu_count = cpu_count;
131
132 return true;
133}
134
4ec5c9da
CB
135static void free_proc_stat_node(struct cg_proc_stat *node)
136{
137 pthread_mutex_destroy(&node->lock);
138 free_disarm(node->cg);
139 free_disarm(node->usage);
140 free_disarm(node->view);
141 free_disarm(node);
142}
143
1f5596dd
CB
144static struct cg_proc_stat *add_proc_stat_node(struct cg_proc_stat *new_node)
145{
146 int hash = calc_hash(new_node->cg) % CPUVIEW_HASH_SIZE;
147 struct cg_proc_stat_head *head = proc_stat_history[hash];
148 struct cg_proc_stat *node, *rv = new_node;
149
150 pthread_rwlock_wrlock(&head->lock);
151
152 if (!head->next) {
153 head->next = new_node;
154 goto out;
155 }
156
157 node = head->next;
158
159 for (;;) {
160 if (strcmp(node->cg, new_node->cg) == 0) {
161 /* The node is already present, return it */
162 free_proc_stat_node(new_node);
163 rv = node;
164 goto out;
165 }
166
167 if (node->next) {
168 node = node->next;
169 continue;
170 }
171
172 node->next = new_node;
173 goto out;
174 }
175
176out:
177 pthread_rwlock_unlock(&head->lock);
178 return rv;
179}
180
181static struct cg_proc_stat *new_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)
182{
183 struct cg_proc_stat *node;
184 int i;
185
186 node = malloc(sizeof(struct cg_proc_stat));
187 if (!node)
188 goto err;
189
190 node->cg = NULL;
191 node->usage = NULL;
192 node->view = NULL;
193
194 node->cg = malloc(strlen(cg) + 1);
195 if (!node->cg)
196 goto err;
197
198 strcpy(node->cg, cg);
199
200 node->usage = malloc(sizeof(struct cpuacct_usage) * cpu_count);
201 if (!node->usage)
202 goto err;
203
204 memcpy(node->usage, usage, sizeof(struct cpuacct_usage) * cpu_count);
205
206 node->view = malloc(sizeof(struct cpuacct_usage) * cpu_count);
207 if (!node->view)
208 goto err;
209
210 node->cpu_count = cpu_count;
211 node->next = NULL;
212
213 if (pthread_mutex_init(&node->lock, NULL) != 0) {
214 lxcfs_error("%s\n", "Failed to initialize node lock");
215 goto err;
216 }
217
218 for (i = 0; i < cpu_count; i++) {
219 node->view[i].user = 0;
220 node->view[i].system = 0;
221 node->view[i].idle = 0;
222 }
223
224 return node;
225
226err:
227 if (node && node->cg)
228 free(node->cg);
229 if (node && node->usage)
230 free(node->usage);
231 if (node && node->view)
232 free(node->view);
233 if (node)
234 free(node);
235
236 return NULL;
237}
238
4ec5c9da
CB
239static bool cgfs_param_exist(const char *controller, const char *cgroup,
240 const char *file)
241{
242 int ret, cfd;
243 size_t len;
244 char *fnam;
245
246 cfd = get_cgroup_fd(controller);
247 if (cfd < 0)
248 return false;
249
250 /* Make sure we pass a relative path to *at() family of functions.
251 * . + /cgroup + / + file + \0
252 */
253 len = strlen(cgroup) + strlen(file) + 3;
254 fnam = alloca(len);
255 ret = snprintf(fnam, len, "%s%s/%s", dot_or_empty(cgroup), cgroup, file);
256 if (ret < 0 || (size_t)ret >= len)
257 return false;
258
259 return (faccessat(cfd, fnam, F_OK, 0) == 0);
260}
261
1f5596dd
CB
262static struct cg_proc_stat *prune_proc_stat_list(struct cg_proc_stat *node)
263{
264 struct cg_proc_stat *first = NULL, *prev, *tmp;
265
266 for (prev = NULL; node; ) {
267 if (!cgfs_param_exist("cpu", node->cg, "cpu.shares")) {
268 tmp = node;
269 lxcfs_debug("Removing stat node for %s\n", node->cg);
270
271 if (prev)
272 prev->next = node->next;
273 else
274 first = node->next;
275
276 node = node->next;
277 free_proc_stat_node(tmp);
278 } else {
279 if (!first)
280 first = node;
281 prev = node;
282 node = node->next;
283 }
284 }
285
286 return first;
287}
288
289#define PROC_STAT_PRUNE_INTERVAL 10
290static void prune_proc_stat_history(void)
291{
292 int i;
293 time_t now = time(NULL);
294
295 for (i = 0; i < CPUVIEW_HASH_SIZE; i++) {
296 pthread_rwlock_wrlock(&proc_stat_history[i]->lock);
297
298 if ((proc_stat_history[i]->lastcheck + PROC_STAT_PRUNE_INTERVAL) > now) {
299 pthread_rwlock_unlock(&proc_stat_history[i]->lock);
300 return;
301 }
302
303 if (proc_stat_history[i]->next) {
304 proc_stat_history[i]->next = prune_proc_stat_list(proc_stat_history[i]->next);
305 proc_stat_history[i]->lastcheck = now;
306 }
307
308 pthread_rwlock_unlock(&proc_stat_history[i]->lock);
309 }
310}
311
312static struct cg_proc_stat *find_proc_stat_node(struct cg_proc_stat_head *head,
313 const char *cg)
314{
315 struct cg_proc_stat *node;
316
317 pthread_rwlock_rdlock(&head->lock);
318
319 if (!head->next) {
320 pthread_rwlock_unlock(&head->lock);
321 return NULL;
322 }
323
324 node = head->next;
325
326 do {
327 if (strcmp(cg, node->cg) == 0)
328 goto out;
329 } while ((node = node->next));
330
331 node = NULL;
332
333out:
334 pthread_rwlock_unlock(&head->lock);
335 prune_proc_stat_history();
336 return node;
337}
338
339static struct cg_proc_stat *find_or_create_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)
340{
341 int hash = calc_hash(cg) % CPUVIEW_HASH_SIZE;
342 struct cg_proc_stat_head *head = proc_stat_history[hash];
343 struct cg_proc_stat *node;
344
345 node = find_proc_stat_node(head, cg);
346
347 if (!node) {
348 node = new_proc_stat_node(usage, cpu_count, cg);
349 if (!node)
350 return NULL;
351
352 node = add_proc_stat_node(node);
353 lxcfs_debug("New stat node (%d) for %s\n", cpu_count, cg);
354 }
355
356 pthread_mutex_lock(&node->lock);
357
358 /* If additional CPUs on the host have been enabled, CPU usage counter
359 * arrays have to be expanded */
360 if (node->cpu_count < cpu_count) {
361 lxcfs_debug("Expanding stat node %d->%d for %s\n",
362 node->cpu_count, cpu_count, cg);
363
364 if (!expand_proc_stat_node(node, cpu_count)) {
365 pthread_mutex_unlock(&node->lock);
366 lxcfs_debug("Unable to expand stat node %d->%d for %s\n",
367 node->cpu_count, cpu_count, cg);
368 return NULL;
369 }
370 }
371
372 return node;
373}
374
2b8eff1d
CB
375static void add_cpu_usage(uint64_t *surplus, struct cpuacct_usage *usage,
376 uint64_t *counter, uint64_t threshold)
1f5596dd
CB
377{
378 unsigned long free_space, to_add;
379
380 free_space = threshold - usage->user - usage->system;
381
382 if (free_space > usage->idle)
383 free_space = usage->idle;
384
385 to_add = free_space > *surplus ? *surplus : free_space;
386
387 *counter += to_add;
388 usage->idle -= to_add;
389 *surplus -= to_add;
390}
391
392static unsigned long diff_cpu_usage(struct cpuacct_usage *older,
393 struct cpuacct_usage *newer,
394 struct cpuacct_usage *diff, int cpu_count)
395{
396 int i;
397 unsigned long sum = 0;
398
399 for (i = 0; i < cpu_count; i++) {
400 if (!newer[i].online)
401 continue;
402
403 /* When cpuset is changed on the fly, the CPUs might get reordered.
404 * We could either reset all counters, or check that the substractions
405 * below will return expected results.
406 */
407 if (newer[i].user > older[i].user)
408 diff[i].user = newer[i].user - older[i].user;
409 else
410 diff[i].user = 0;
411
412 if (newer[i].system > older[i].system)
413 diff[i].system = newer[i].system - older[i].system;
414 else
415 diff[i].system = 0;
416
417 if (newer[i].idle > older[i].idle)
418 diff[i].idle = newer[i].idle - older[i].idle;
419 else
420 diff[i].idle = 0;
421
422 sum += diff[i].user;
423 sum += diff[i].system;
424 sum += diff[i].idle;
425 }
426
427 return sum;
428}
429
430/*
431 * Read cgroup CPU quota parameters from `cpu.cfs_quota_us` or `cpu.cfs_period_us`,
432 * depending on `param`. Parameter value is returned throuh `value`.
433 */
434static bool read_cpu_cfs_param(const char *cg, const char *param, int64_t *value)
435{
436 __do_free char *str = NULL;
437 char file[11 + 6 + 1]; /* cpu.cfs__us + quota/period + \0 */
438
439 snprintf(file, sizeof(file), "cpu.cfs_%s_us", param);
440
441 if (!cgroup_ops->get(cgroup_ops, "cpu", cg, file, &str))
442 return false;
443
2b8eff1d 444 if (sscanf(str, "%"PRId64, value) != 1)
1f5596dd
CB
445 return false;
446
447 return true;
448}
449
450/*
451 * Return the exact number of visible CPUs based on CPU quotas.
452 * If there is no quota set, zero is returned.
453 */
454static double exact_cpu_count(const char *cg)
455{
456 double rv;
457 int nprocs;
458 int64_t cfs_quota, cfs_period;
459
460 if (!read_cpu_cfs_param(cg, "quota", &cfs_quota))
461 return 0;
462
463 if (!read_cpu_cfs_param(cg, "period", &cfs_period))
464 return 0;
465
466 if (cfs_quota <= 0 || cfs_period <= 0)
467 return 0;
468
469 rv = (double)cfs_quota / (double)cfs_period;
470
471 nprocs = get_nprocs();
472
473 if (rv > nprocs)
474 rv = nprocs;
475
476 return rv;
477}
478
479/*
480 * Return the maximum number of visible CPUs based on CPU quotas.
481 * If there is no quota set, zero is returned.
482 */
4ec5c9da 483int max_cpu_count(const char *cg)
1f5596dd 484{
700dd417 485 __do_free char *cpuset = NULL;
1f5596dd
CB
486 int rv, nprocs;
487 int64_t cfs_quota, cfs_period;
488 int nr_cpus_in_cpuset = 0;
1f5596dd
CB
489
490 if (!read_cpu_cfs_param(cg, "quota", &cfs_quota))
491 return 0;
492
493 if (!read_cpu_cfs_param(cg, "period", &cfs_period))
494 return 0;
495
496 cpuset = get_cpuset(cg);
497 if (cpuset)
498 nr_cpus_in_cpuset = cpu_number_in_cpuset(cpuset);
499
500 if (cfs_quota <= 0 || cfs_period <= 0){
501 if (nr_cpus_in_cpuset > 0)
502 return nr_cpus_in_cpuset;
503
504 return 0;
505 }
506
507 rv = cfs_quota / cfs_period;
508
509 /* In case quota/period does not yield a whole number, add one CPU for
510 * the remainder.
511 */
512 if ((cfs_quota % cfs_period) > 0)
513 rv += 1;
514
515 nprocs = get_nprocs();
516
517 if (rv > nprocs)
518 rv = nprocs;
519
520 /* use min value in cpu quota and cpuset */
521 if (nr_cpus_in_cpuset > 0 && nr_cpus_in_cpuset < rv)
522 rv = nr_cpus_in_cpuset;
523
524 return rv;
525}
526
527int cpuview_proc_stat(const char *cg, const char *cpuset,
528 struct cpuacct_usage *cg_cpu_usage, int cg_cpu_usage_size,
529 FILE *f, char *buf, size_t buf_size)
530{
531 __do_free char *line = NULL;
532 __do_free struct cpuacct_usage *diff = NULL;
533 size_t linelen = 0, total_len = 0, l;
534 int curcpu = -1; /* cpu numbering starts at 0 */
535 int physcpu, i;
536 int max_cpus = max_cpu_count(cg), cpu_cnt = 0;
2b8eff1d
CB
537 uint64_t user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0,
538 softirq = 0, steal = 0, guest = 0, guest_nice = 0;
539 uint64_t user_sum = 0, system_sum = 0, idle_sum = 0;
540 uint64_t user_surplus = 0, system_surplus = 0;
541 uint64_t total_sum, threshold;
1f5596dd
CB
542 struct cg_proc_stat *stat_node;
543 int nprocs = get_nprocs_conf();
544
545 if (cg_cpu_usage_size < nprocs)
546 nprocs = cg_cpu_usage_size;
547
548 /* Read all CPU stats and stop when we've encountered other lines */
549 while (getline(&line, &linelen, f) != -1) {
550 int ret;
551 char cpu_char[10]; /* That's a lot of cores */
552 uint64_t all_used, cg_used;
553
554 if (strlen(line) == 0)
555 continue;
556
557 /* not a ^cpuN line containing a number N */
558 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1)
559 break;
560
561 if (sscanf(cpu_char, "%d", &physcpu) != 1)
562 continue;
563
564 if (physcpu >= cg_cpu_usage_size)
565 continue;
566
567 curcpu ++;
568 cpu_cnt ++;
569
570 if (!cpu_in_cpuset(physcpu, cpuset)) {
571 for (i = curcpu; i <= physcpu; i++)
572 cg_cpu_usage[i].online = false;
573 continue;
574 }
575
576 if (curcpu < physcpu) {
577 /* Some CPUs may be disabled */
578 for (i = curcpu; i < physcpu; i++)
579 cg_cpu_usage[i].online = false;
580
581 curcpu = physcpu;
582 }
583
584 cg_cpu_usage[curcpu].online = true;
585
2b8eff1d 586 ret = sscanf(line, "%*s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "lu",
1f5596dd
CB
587 &user,
588 &nice,
589 &system,
590 &idle,
591 &iowait,
592 &irq,
593 &softirq,
594 &steal,
595 &guest,
596 &guest_nice);
597
598 if (ret != 10)
599 continue;
600
601 all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice;
602 cg_used = cg_cpu_usage[curcpu].user + cg_cpu_usage[curcpu].system;
603
604 if (all_used >= cg_used) {
605 cg_cpu_usage[curcpu].idle = idle + (all_used - cg_used);
606
607 } else {
2b8eff1d
CB
608 lxcfs_error("cpu%d from %s has unexpected cpu time: %" PRIu64 " in /proc/stat, %" PRIu64 " in cpuacct.usage_all; unable to determine idle time",
609 curcpu, cg, all_used, cg_used);
1f5596dd
CB
610 cg_cpu_usage[curcpu].idle = idle;
611 }
612 }
613
614 /* Cannot use more CPUs than is available due to cpuset */
615 if (max_cpus > cpu_cnt)
616 max_cpus = cpu_cnt;
617
618 stat_node = find_or_create_proc_stat_node(cg_cpu_usage, nprocs, cg);
1f5596dd
CB
619 if (!stat_node) {
620 lxcfs_error("unable to find/create stat node for %s\n", cg);
621 return 0;
622 }
623
624 diff = malloc(sizeof(struct cpuacct_usage) * nprocs);
700dd417 625 if (!diff)
1f5596dd 626 return 0;
1f5596dd
CB
627
628 /*
629 * If the new values are LOWER than values stored in memory, it means
630 * the cgroup has been reset/recreated and we should reset too.
631 */
632 for (curcpu = 0; curcpu < nprocs; curcpu++) {
633 if (!cg_cpu_usage[curcpu].online)
634 continue;
635
636 if (cg_cpu_usage[curcpu].user < stat_node->usage[curcpu].user)
637 reset_proc_stat_node(stat_node, cg_cpu_usage, nprocs);
638
639 break;
640 }
641
642 total_sum = diff_cpu_usage(stat_node->usage, cg_cpu_usage, diff, nprocs);
643
644 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
645 stat_node->usage[curcpu].online = cg_cpu_usage[curcpu].online;
646
647 if (!stat_node->usage[curcpu].online)
648 continue;
649
650 i++;
651
652 stat_node->usage[curcpu].user += diff[curcpu].user;
653 stat_node->usage[curcpu].system += diff[curcpu].system;
654 stat_node->usage[curcpu].idle += diff[curcpu].idle;
655
656 if (max_cpus > 0 && i >= max_cpus) {
657 user_surplus += diff[curcpu].user;
658 system_surplus += diff[curcpu].system;
659 }
660 }
661
662 /* Calculate usage counters of visible CPUs */
663 if (max_cpus > 0) {
2b8eff1d
CB
664 uint64_t diff_user = 0;
665 uint64_t diff_system = 0;
666 uint64_t diff_idle = 0;
667 uint64_t max_diff_idle = 0;
668 uint64_t max_diff_idle_index = 0;
1f5596dd
CB
669 double exact_cpus;
670
671 /* threshold = maximum usage per cpu, including idle */
672 threshold = total_sum / cpu_cnt * max_cpus;
673
674 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
675 if (!stat_node->usage[curcpu].online)
676 continue;
677
678 i++;
679
680 if (i == max_cpus)
681 break;
682
683 if (diff[curcpu].user + diff[curcpu].system >= threshold)
684 continue;
685
686 /* Add user */
687 add_cpu_usage(&user_surplus, &diff[curcpu],
688 &diff[curcpu].user, threshold);
689
690 if (diff[curcpu].user + diff[curcpu].system >= threshold)
691 continue;
692
693 /* If there is still room, add system */
694 add_cpu_usage(&system_surplus, &diff[curcpu],
695 &diff[curcpu].system, threshold);
696 }
697
698 if (user_surplus > 0)
699 lxcfs_debug("leftover user: %lu for %s\n", user_surplus, cg);
700 if (system_surplus > 0)
701 lxcfs_debug("leftover system: %lu for %s\n", system_surplus, cg);
702
703 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
704 if (!stat_node->usage[curcpu].online)
705 continue;
706
707 i++;
708
709 if (i == max_cpus)
710 break;
711
712 stat_node->view[curcpu].user += diff[curcpu].user;
713 stat_node->view[curcpu].system += diff[curcpu].system;
714 stat_node->view[curcpu].idle += diff[curcpu].idle;
715
716 user_sum += stat_node->view[curcpu].user;
717 system_sum += stat_node->view[curcpu].system;
718 idle_sum += stat_node->view[curcpu].idle;
719
720 diff_user += diff[curcpu].user;
721 diff_system += diff[curcpu].system;
722 diff_idle += diff[curcpu].idle;
723 if (diff[curcpu].idle > max_diff_idle) {
724 max_diff_idle = diff[curcpu].idle;
725 max_diff_idle_index = curcpu;
726 }
727
728 lxcfs_v("curcpu: %d, diff_user: %lu, diff_system: %lu, diff_idle: %lu\n", curcpu, diff[curcpu].user, diff[curcpu].system, diff[curcpu].idle);
729 }
730 lxcfs_v("total. diff_user: %lu, diff_system: %lu, diff_idle: %lu\n", diff_user, diff_system, diff_idle);
731
732 /* revise cpu usage view to support partial cpu case. */
733 exact_cpus = exact_cpu_count(cg);
734 if (exact_cpus < (double)max_cpus){
735 unsigned long delta = (unsigned long)((double)(diff_user + diff_system + diff_idle) * (1 - exact_cpus / (double)max_cpus));
736
737 lxcfs_v("revising cpu usage view to match the exact cpu count [%f]\n", exact_cpus);
738 lxcfs_v("delta: %lu\n", delta);
739 lxcfs_v("idle_sum before: %lu\n", idle_sum);
740 idle_sum = idle_sum > delta ? idle_sum - delta : 0;
741 lxcfs_v("idle_sum after: %lu\n", idle_sum);
742
743 curcpu = max_diff_idle_index;
744 lxcfs_v("curcpu: %d, idle before: %lu\n", curcpu, stat_node->view[curcpu].idle);
745 stat_node->view[curcpu].idle = stat_node->view[curcpu].idle > delta ? stat_node->view[curcpu].idle - delta : 0;
746 lxcfs_v("curcpu: %d, idle after: %lu\n", curcpu, stat_node->view[curcpu].idle);
747 }
748 } else {
749 for (curcpu = 0; curcpu < nprocs; curcpu++) {
750 if (!stat_node->usage[curcpu].online)
751 continue;
752
753 stat_node->view[curcpu].user = stat_node->usage[curcpu].user;
754 stat_node->view[curcpu].system = stat_node->usage[curcpu].system;
755 stat_node->view[curcpu].idle = stat_node->usage[curcpu].idle;
756
757 user_sum += stat_node->view[curcpu].user;
758 system_sum += stat_node->view[curcpu].system;
759 idle_sum += stat_node->view[curcpu].idle;
760 }
761 }
762
763 /* Render the file */
764 /* cpu-all */
2b8eff1d
CB
765 l = snprintf(buf, buf_size,
766 "cpu %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n",
767 user_sum, system_sum, idle_sum);
1f5596dd
CB
768 lxcfs_v("cpu-all: %s\n", buf);
769
770 if (l < 0) {
771 perror("Error writing to cache");
772 return 0;
773 }
774 if (l >= buf_size) {
775 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
776 return 0;
777 }
778
779 buf += l;
780 buf_size -= l;
781 total_len += l;
782
783 /* Render visible CPUs */
784 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
785 if (!stat_node->usage[curcpu].online)
786 continue;
787
788 i++;
789
790 if (max_cpus > 0 && i == max_cpus)
791 break;
792
2b8eff1d
CB
793 l = snprintf(buf, buf_size, "cpu%d %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n",
794 i,
795 stat_node->view[curcpu].user,
796 stat_node->view[curcpu].system,
797 stat_node->view[curcpu].idle);
1f5596dd
CB
798 lxcfs_v("cpu: %s\n", buf);
799
800 if (l < 0) {
801 perror("Error writing to cache");
802 return 0;
803
804 }
805 if (l >= buf_size) {
806 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
807 return 0;
808 }
809
810 buf += l;
811 buf_size -= l;
812 total_len += l;
813 }
814
815 /* Pass the rest of /proc/stat, start with the last line read */
816 l = snprintf(buf, buf_size, "%s", line);
817
818 if (l < 0) {
819 perror("Error writing to cache");
820 return 0;
821
822 }
823 if (l >= buf_size) {
824 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
825 return 0;
826 }
827
828 buf += l;
829 buf_size -= l;
830 total_len += l;
831
832 /* Pass the rest of the host's /proc/stat */
833 while (getline(&line, &linelen, f) != -1) {
834 l = snprintf(buf, buf_size, "%s", line);
835 if (l < 0) {
836 perror("Error writing to cache");
837 return 0;
838 }
839 if (l >= buf_size) {
840 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
841 return 0;
842 }
843 buf += l;
844 buf_size -= l;
845 total_len += l;
846 }
847
848 if (stat_node)
849 pthread_mutex_unlock(&stat_node->lock);
850 return total_len;
851}
852
853/*
854 * check whether this is a '^processor" line in /proc/cpuinfo
855 */
856static bool is_processor_line(const char *line)
857{
858 int cpu;
859
860 if (sscanf(line, "processor : %d", &cpu) == 1)
861 return true;
862 return false;
863}
864
865static bool cpuline_in_cpuset(const char *line, const char *cpuset)
866{
867 int cpu;
868
869 if (sscanf(line, "processor : %d", &cpu) != 1)
870 return false;
871 return cpu_in_cpuset(cpu, cpuset);
872}
873
874int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
875 struct fuse_file_info *fi)
876{
877 __do_free char *cg = NULL, *cpuset = NULL, *line = NULL;
878 __do_fclose FILE *f = NULL;
879 struct fuse_context *fc = fuse_get_context();
99b183fb 880 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1f5596dd
CB
881 size_t linelen = 0, total_len = 0;
882 bool am_printing = false, firstline = true, is_s390x = false;
883 int curcpu = -1, cpu, max_cpus = 0;
884 bool use_view;
885 char *cache = d->buf;
886 size_t cache_size = d->buflen;
887
888 if (offset){
889 int left;
890
891 if (offset > d->size)
892 return -EINVAL;
893
894 if (!d->cached)
895 return 0;
896
897 left = d->size - offset;
898 total_len = left > size ? size: left;
899 memcpy(buf, cache + offset, total_len);
900
901 return total_len;
902 }
903
904 pid_t initpid = lookup_initpid_in_store(fc->pid);
905 if (initpid <= 1 || is_shared_pidns(initpid))
906 initpid = fc->pid;
907 cg = get_pid_cgroup(initpid, "cpuset");
908 if (!cg)
909 return read_file_fuse("proc/cpuinfo", buf, size, d);
910 prune_init_slice(cg);
911
912 cpuset = get_cpuset(cg);
913 if (!cpuset)
914 return 0;
915
916 use_view = cgroup_ops->can_use_cpuview(cgroup_ops);
917 if (use_view)
918 max_cpus = max_cpu_count(cg);
919
920 f = fopen("/proc/cpuinfo", "r");
921 if (!f)
922 return 0;
923
924 while (getline(&line, &linelen, f) != -1) {
925 ssize_t l;
926 if (firstline) {
927 firstline = false;
928 if (strstr(line, "IBM/S390") != NULL) {
929 is_s390x = true;
930 am_printing = true;
931 continue;
932 }
933 }
934 if (strncmp(line, "# processors:", 12) == 0)
935 continue;
936 if (is_processor_line(line)) {
937 if (use_view && max_cpus > 0 && (curcpu+1) == max_cpus)
938 break;
939 am_printing = cpuline_in_cpuset(line, cpuset);
940 if (am_printing) {
941 curcpu ++;
942 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
943 if (l < 0) {
944 perror("Error writing to cache");
945 return 0;
946 }
947 if (l >= cache_size) {
948 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
949 return 0;
950 }
951 cache += l;
952 cache_size -= l;
953 total_len += l;
954 }
955 continue;
956 } else if (is_s390x && sscanf(line, "processor %d:", &cpu) == 1) {
957 char *p;
958 if (use_view && max_cpus > 0 && (curcpu+1) == max_cpus)
959 break;
960 if (!cpu_in_cpuset(cpu, cpuset))
961 continue;
962 curcpu ++;
963 p = strchr(line, ':');
964 if (!p || !*p)
965 return 0;
966 p++;
967 l = snprintf(cache, cache_size, "processor %d:%s", curcpu, p);
968 if (l < 0) {
969 perror("Error writing to cache");
970 return 0;
971 }
972 if (l >= cache_size) {
973 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
974 return 0;
975 }
976 cache += l;
977 cache_size -= l;
978 total_len += l;
979 continue;
980
981 }
982 if (am_printing) {
983 l = snprintf(cache, cache_size, "%s", line);
984 if (l < 0) {
985 perror("Error writing to cache");
986 return 0;
987 }
988 if (l >= cache_size) {
989 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
990 return 0;
991 }
992 cache += l;
993 cache_size -= l;
994 total_len += l;
995 }
996 }
997
998 if (is_s390x) {
999 __do_free char *origcache = d->buf;
1000 ssize_t l;
1001
1002 d->buf = malloc(d->buflen);
1003 if (!d->buf) {
1004 d->buf = move_ptr(origcache);
1005 return 0;
1006 }
1007
1008 cache = d->buf;
1009 cache_size = d->buflen;
1010 total_len = 0;
1011 l = snprintf(cache, cache_size, "vendor_id : IBM/S390\n");
1012 if (l < 0 || l >= cache_size)
1013 return 0;
1014
1015 cache_size -= l;
1016 cache += l;
1017 total_len += l;
1018 l = snprintf(cache, cache_size, "# processors : %d\n", curcpu + 1);
1019 if (l < 0 || l >= cache_size)
1020 return 0;
1021
1022 cache_size -= l;
1023 cache += l;
1024 total_len += l;
1025 l = snprintf(cache, cache_size, "%s", origcache);
1026 if (l < 0 || l >= cache_size)
1027 return 0;
1028 total_len += l;
1029 }
1030
1031 d->cached = 1;
1032 d->size = total_len;
1033 if (total_len > size ) total_len = size;
1034
1035 /* read from off 0 */
1036 memcpy(buf, d->buf, total_len);
1037 return total_len;
1038}
1039
1040/*
1041 * Returns 0 on success.
1042 * It is the caller's responsibility to free `return_usage`, unless this
1043 * function returns an error.
1044 */
1045int read_cpuacct_usage_all(char *cg, char *cpuset,
1046 struct cpuacct_usage **return_usage, int *size)
1047{
1048 __do_free char *usage_str = NULL;
1049 __do_free struct cpuacct_usage *cpu_usage = NULL;
1050 int cpucount = get_nprocs_conf();
9ce186dc
CB
1051 int i = 0, j = 0, read_pos = 0, read_cnt = 0;
1052 int ret;
1f5596dd
CB
1053 int cg_cpu;
1054 uint64_t cg_user, cg_system;
1055 int64_t ticks_per_sec;
1056
1057 ticks_per_sec = sysconf(_SC_CLK_TCK);
1058
1059 if (ticks_per_sec < 0 && errno == EINVAL) {
1060 lxcfs_v(
1061 "%s\n",
1062 "read_cpuacct_usage_all failed to determine number of clock ticks "
1063 "in a second");
1064 return -1;
1065 }
1066
1067 cpu_usage = malloc(sizeof(struct cpuacct_usage) * cpucount);
1068 if (!cpu_usage)
1069 return -ENOMEM;
1070
1071 memset(cpu_usage, 0, sizeof(struct cpuacct_usage) * cpucount);
1072 if (!cgroup_ops->get(cgroup_ops, "cpuacct", cg, "cpuacct.usage_all", &usage_str)) {
1073 char *data = NULL;
1f5596dd
CB
1074 size_t sz = 0, asz = 0;
1075
1076 /* read cpuacct.usage_percpu instead. */
1077 lxcfs_v("failed to read cpuacct.usage_all. reading cpuacct.usage_percpu instead\n%s", "");
1078 if (!cgroup_ops->get(cgroup_ops, "cpuacct", cg, "cpuacct.usage_percpu", &usage_str))
1079 return -1;
1080 lxcfs_v("usage_str: %s\n", usage_str);
1081
1082 /* convert cpuacct.usage_percpu into cpuacct.usage_all. */
1083 lxcfs_v("converting cpuacct.usage_percpu into cpuacct.usage_all\n%s", "");
1084
1085 must_strcat(&data, &sz, &asz, "cpu user system\n");
1086
2b8eff1d
CB
1087 while (sscanf(usage_str + read_pos, "%" PRIu64 " %n", &cg_user, &read_cnt) > 0) {
1088 lxcfs_debug("i: %d, cg_user: %" PRIu64 ", read_pos: %d, read_cnt: %d\n", i, cg_user, read_pos, read_cnt);
1f5596dd
CB
1089 must_strcat(&data, &sz, &asz, "%d %lu 0\n", i, cg_user);
1090 i++;
1091 read_pos += read_cnt;
1092 }
1093
1094 usage_str = data;
1095
1096 lxcfs_v("usage_str: %s\n", usage_str);
1097 }
1098
1099 if (sscanf(usage_str, "cpu user system\n%n", &read_cnt) != 0) {
1100 lxcfs_error("read_cpuacct_usage_all reading first line from "
1101 "%s/cpuacct.usage_all failed.\n", cg);
1102 return -1;
1103 }
1104
1105 read_pos += read_cnt;
1106
1107 for (i = 0, j = 0; i < cpucount; i++) {
2b8eff1d
CB
1108 ret = sscanf(usage_str + read_pos,
1109 "%d %" PRIu64 " %" PRIu64 "\n%n", &cg_cpu,
1110 &cg_user, &cg_system, &read_cnt);
1f5596dd
CB
1111
1112 if (ret == EOF)
1113 break;
1114
1115 if (ret != 3) {
1116 lxcfs_error("read_cpuacct_usage_all reading from %s/cpuacct.usage_all "
1117 "failed.\n", cg);
1118 return -1;
1119 }
1120
1121 read_pos += read_cnt;
1122
1123 /* Convert the time from nanoseconds to USER_HZ */
1124 cpu_usage[j].user = cg_user / 1000.0 / 1000 / 1000 * ticks_per_sec;
1125 cpu_usage[j].system = cg_system / 1000.0 / 1000 / 1000 * ticks_per_sec;
1126 j++;
1127 }
1128
1129 *return_usage = move_ptr(cpu_usage);
1130 *size = cpucount;
1131 return 0;
1132}
1133
1134static bool cpuview_init_head(struct cg_proc_stat_head **head)
1135{
1136 *head = malloc(sizeof(struct cg_proc_stat_head));
1137 if (!(*head)) {
1138 lxcfs_error("%s\n", strerror(errno));
1139 return false;
1140 }
1141
1142 (*head)->lastcheck = time(NULL);
1143 (*head)->next = NULL;
1144
1145 if (pthread_rwlock_init(&(*head)->lock, NULL) != 0) {
1146 lxcfs_error("%s\n", "Failed to initialize list lock");
1147 free_disarm(*head);
1148 return false;
1149 }
1150
1151 return true;
1152}
1153
4ec5c9da 1154bool init_cpuview(void)
1f5596dd
CB
1155{
1156 int i;
1157
1158 for (i = 0; i < CPUVIEW_HASH_SIZE; i++)
1159 proc_stat_history[i] = NULL;
1160
1161 for (i = 0; i < CPUVIEW_HASH_SIZE; i++) {
1162 if (!cpuview_init_head(&proc_stat_history[i]))
1163 goto err;
1164 }
1165
1166 return true;
1167
1168err:
1169 for (i = 0; i < CPUVIEW_HASH_SIZE; i++) {
1170 if (proc_stat_history[i])
1171 free_disarm(proc_stat_history[i]);
1172 }
1173
1174 return false;
1175}
1176
1f5596dd
CB
1177static void cpuview_free_head(struct cg_proc_stat_head *head)
1178{
1179 struct cg_proc_stat *node, *tmp;
1180
1181 if (head->next) {
1182 node = head->next;
1183
1184 for (;;) {
1185 tmp = node;
1186 node = node->next;
1187 free_proc_stat_node(tmp);
1188
1189 if (!node)
1190 break;
1191 }
1192 }
1193
1194 pthread_rwlock_destroy(&head->lock);
1195 free_disarm(head);
1196}
1197
4ec5c9da 1198void free_cpuview(void)
1f5596dd 1199{
4ec5c9da 1200 for (int i = 0; i < CPUVIEW_HASH_SIZE; i++)
1f5596dd
CB
1201 if (proc_stat_history[i])
1202 cpuview_free_head(proc_stat_history[i]);
1f5596dd 1203}