]> git.proxmox.com Git - mirror_lxcfs.git/blame - proc_cpuview.c
croups: remove unused variable
[mirror_lxcfs.git] / proc_cpuview.c
CommitLineData
1f5596dd
CB
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6
7#ifndef FUSE_USE_VERSION
8#define FUSE_USE_VERSION 26
9#endif
10
11#define _FILE_OFFSET_BITS 64
12
13#define __STDC_FORMAT_MACROS
14#include <dirent.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <fuse.h>
18#include <inttypes.h>
19#include <libgen.h>
20#include <pthread.h>
21#include <sched.h>
22#include <stdarg.h>
23#include <stdbool.h>
24#include <stdint.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <time.h>
29#include <unistd.h>
30#include <wait.h>
31#include <linux/magic.h>
32#include <linux/sched.h>
33#include <sys/epoll.h>
34#include <sys/mman.h>
35#include <sys/mount.h>
36#include <sys/param.h>
37#include <sys/socket.h>
38#include <sys/syscall.h>
39#include <sys/sysinfo.h>
40#include <sys/vfs.h>
41
42#include "bindings.h"
43#include "config.h"
44#include "cgroup_fuse.h"
45#include "cpuset_parse.h"
46#include "cgroups/cgroup.h"
47#include "cgroups/cgroup_utils.h"
48#include "memory_utils.h"
4ec5c9da 49#include "proc_loadavg.h"
1f5596dd
CB
50#include "utils.h"
51
1f5596dd
CB
52/* Data for CPU view */
53struct cg_proc_stat {
54 char *cg;
55 struct cpuacct_usage *usage; // Real usage as read from the host's /proc/stat
56 struct cpuacct_usage *view; // Usage stats reported to the container
57 int cpu_count;
58 pthread_mutex_t lock; // For node manipulation
59 struct cg_proc_stat *next;
60};
61
62struct cg_proc_stat_head {
63 struct cg_proc_stat *next;
64 time_t lastcheck;
65
66 /*
67 * For access to the list. Reading can be parallel, pruning is exclusive.
68 */
69 pthread_rwlock_t lock;
70};
71
72#define CPUVIEW_HASH_SIZE 100
73static struct cg_proc_stat_head *proc_stat_history[CPUVIEW_HASH_SIZE];
74
75static void reset_proc_stat_node(struct cg_proc_stat *node, struct cpuacct_usage *usage, int cpu_count)
76{
77 int i;
78
79 lxcfs_debug("Resetting stat node for %s\n", node->cg);
80 memcpy(node->usage, usage, sizeof(struct cpuacct_usage) * cpu_count);
81
82 for (i = 0; i < cpu_count; i++) {
83 node->view[i].user = 0;
84 node->view[i].system = 0;
85 node->view[i].idle = 0;
86 }
87
88 node->cpu_count = cpu_count;
89}
90
91static bool expand_proc_stat_node(struct cg_proc_stat *node, int cpu_count)
92{
93 __do_free struct cpuacct_usage *new_usage = NULL, *new_view = NULL;
94
95 /* Allocate new memory */
96 new_usage = malloc(sizeof(struct cpuacct_usage) * cpu_count);
97 if (!new_usage)
98 return false;
99
100 new_view = malloc(sizeof(struct cpuacct_usage) * cpu_count);
101 if (!new_view)
102 return false;
103
104 /* Copy existing data & initialize new elements */
105 for (int i = 0; i < cpu_count; i++) {
106 if (i < node->cpu_count) {
107 new_usage[i].user = node->usage[i].user;
108 new_usage[i].system = node->usage[i].system;
109 new_usage[i].idle = node->usage[i].idle;
110
111 new_view[i].user = node->view[i].user;
112 new_view[i].system = node->view[i].system;
113 new_view[i].idle = node->view[i].idle;
114 } else {
115 new_usage[i].user = 0;
116 new_usage[i].system = 0;
117 new_usage[i].idle = 0;
118
119 new_view[i].user = 0;
120 new_view[i].system = 0;
121 new_view[i].idle = 0;
122 }
123 }
124
125 free(node->usage);
126 node->usage = move_ptr(new_usage);
127
128 free(node->view);
129 node->view = move_ptr(new_view);
130 node->cpu_count = cpu_count;
131
132 return true;
133}
134
4ec5c9da
CB
135static void free_proc_stat_node(struct cg_proc_stat *node)
136{
137 pthread_mutex_destroy(&node->lock);
138 free_disarm(node->cg);
139 free_disarm(node->usage);
140 free_disarm(node->view);
141 free_disarm(node);
142}
143
1f5596dd
CB
144static struct cg_proc_stat *add_proc_stat_node(struct cg_proc_stat *new_node)
145{
146 int hash = calc_hash(new_node->cg) % CPUVIEW_HASH_SIZE;
147 struct cg_proc_stat_head *head = proc_stat_history[hash];
148 struct cg_proc_stat *node, *rv = new_node;
149
150 pthread_rwlock_wrlock(&head->lock);
151
152 if (!head->next) {
153 head->next = new_node;
154 goto out;
155 }
156
157 node = head->next;
158
159 for (;;) {
160 if (strcmp(node->cg, new_node->cg) == 0) {
161 /* The node is already present, return it */
162 free_proc_stat_node(new_node);
163 rv = node;
164 goto out;
165 }
166
167 if (node->next) {
168 node = node->next;
169 continue;
170 }
171
172 node->next = new_node;
173 goto out;
174 }
175
176out:
177 pthread_rwlock_unlock(&head->lock);
178 return rv;
179}
180
181static struct cg_proc_stat *new_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)
182{
183 struct cg_proc_stat *node;
184 int i;
185
186 node = malloc(sizeof(struct cg_proc_stat));
187 if (!node)
188 goto err;
189
190 node->cg = NULL;
191 node->usage = NULL;
192 node->view = NULL;
193
194 node->cg = malloc(strlen(cg) + 1);
195 if (!node->cg)
196 goto err;
197
198 strcpy(node->cg, cg);
199
200 node->usage = malloc(sizeof(struct cpuacct_usage) * cpu_count);
201 if (!node->usage)
202 goto err;
203
204 memcpy(node->usage, usage, sizeof(struct cpuacct_usage) * cpu_count);
205
206 node->view = malloc(sizeof(struct cpuacct_usage) * cpu_count);
207 if (!node->view)
208 goto err;
209
210 node->cpu_count = cpu_count;
211 node->next = NULL;
212
213 if (pthread_mutex_init(&node->lock, NULL) != 0) {
214 lxcfs_error("%s\n", "Failed to initialize node lock");
215 goto err;
216 }
217
218 for (i = 0; i < cpu_count; i++) {
219 node->view[i].user = 0;
220 node->view[i].system = 0;
221 node->view[i].idle = 0;
222 }
223
224 return node;
225
226err:
227 if (node && node->cg)
228 free(node->cg);
229 if (node && node->usage)
230 free(node->usage);
231 if (node && node->view)
232 free(node->view);
233 if (node)
234 free(node);
235
236 return NULL;
237}
238
4ec5c9da
CB
239static bool cgfs_param_exist(const char *controller, const char *cgroup,
240 const char *file)
241{
242 int ret, cfd;
243 size_t len;
244 char *fnam;
245
246 cfd = get_cgroup_fd(controller);
247 if (cfd < 0)
248 return false;
249
250 /* Make sure we pass a relative path to *at() family of functions.
251 * . + /cgroup + / + file + \0
252 */
253 len = strlen(cgroup) + strlen(file) + 3;
254 fnam = alloca(len);
255 ret = snprintf(fnam, len, "%s%s/%s", dot_or_empty(cgroup), cgroup, file);
256 if (ret < 0 || (size_t)ret >= len)
257 return false;
258
259 return (faccessat(cfd, fnam, F_OK, 0) == 0);
260}
261
1f5596dd
CB
262static struct cg_proc_stat *prune_proc_stat_list(struct cg_proc_stat *node)
263{
264 struct cg_proc_stat *first = NULL, *prev, *tmp;
265
266 for (prev = NULL; node; ) {
267 if (!cgfs_param_exist("cpu", node->cg, "cpu.shares")) {
268 tmp = node;
269 lxcfs_debug("Removing stat node for %s\n", node->cg);
270
271 if (prev)
272 prev->next = node->next;
273 else
274 first = node->next;
275
276 node = node->next;
277 free_proc_stat_node(tmp);
278 } else {
279 if (!first)
280 first = node;
281 prev = node;
282 node = node->next;
283 }
284 }
285
286 return first;
287}
288
289#define PROC_STAT_PRUNE_INTERVAL 10
290static void prune_proc_stat_history(void)
291{
292 int i;
293 time_t now = time(NULL);
294
295 for (i = 0; i < CPUVIEW_HASH_SIZE; i++) {
296 pthread_rwlock_wrlock(&proc_stat_history[i]->lock);
297
298 if ((proc_stat_history[i]->lastcheck + PROC_STAT_PRUNE_INTERVAL) > now) {
299 pthread_rwlock_unlock(&proc_stat_history[i]->lock);
300 return;
301 }
302
303 if (proc_stat_history[i]->next) {
304 proc_stat_history[i]->next = prune_proc_stat_list(proc_stat_history[i]->next);
305 proc_stat_history[i]->lastcheck = now;
306 }
307
308 pthread_rwlock_unlock(&proc_stat_history[i]->lock);
309 }
310}
311
312static struct cg_proc_stat *find_proc_stat_node(struct cg_proc_stat_head *head,
313 const char *cg)
314{
315 struct cg_proc_stat *node;
316
317 pthread_rwlock_rdlock(&head->lock);
318
319 if (!head->next) {
320 pthread_rwlock_unlock(&head->lock);
321 return NULL;
322 }
323
324 node = head->next;
325
326 do {
327 if (strcmp(cg, node->cg) == 0)
328 goto out;
329 } while ((node = node->next));
330
331 node = NULL;
332
333out:
334 pthread_rwlock_unlock(&head->lock);
335 prune_proc_stat_history();
336 return node;
337}
338
339static struct cg_proc_stat *find_or_create_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)
340{
341 int hash = calc_hash(cg) % CPUVIEW_HASH_SIZE;
342 struct cg_proc_stat_head *head = proc_stat_history[hash];
343 struct cg_proc_stat *node;
344
345 node = find_proc_stat_node(head, cg);
346
347 if (!node) {
348 node = new_proc_stat_node(usage, cpu_count, cg);
349 if (!node)
350 return NULL;
351
352 node = add_proc_stat_node(node);
353 lxcfs_debug("New stat node (%d) for %s\n", cpu_count, cg);
354 }
355
356 pthread_mutex_lock(&node->lock);
357
358 /* If additional CPUs on the host have been enabled, CPU usage counter
359 * arrays have to be expanded */
360 if (node->cpu_count < cpu_count) {
361 lxcfs_debug("Expanding stat node %d->%d for %s\n",
362 node->cpu_count, cpu_count, cg);
363
364 if (!expand_proc_stat_node(node, cpu_count)) {
365 pthread_mutex_unlock(&node->lock);
366 lxcfs_debug("Unable to expand stat node %d->%d for %s\n",
367 node->cpu_count, cpu_count, cg);
368 return NULL;
369 }
370 }
371
372 return node;
373}
374
375static void add_cpu_usage(unsigned long *surplus, struct cpuacct_usage *usage,
376 unsigned long *counter, unsigned long threshold)
377{
378 unsigned long free_space, to_add;
379
380 free_space = threshold - usage->user - usage->system;
381
382 if (free_space > usage->idle)
383 free_space = usage->idle;
384
385 to_add = free_space > *surplus ? *surplus : free_space;
386
387 *counter += to_add;
388 usage->idle -= to_add;
389 *surplus -= to_add;
390}
391
392static unsigned long diff_cpu_usage(struct cpuacct_usage *older,
393 struct cpuacct_usage *newer,
394 struct cpuacct_usage *diff, int cpu_count)
395{
396 int i;
397 unsigned long sum = 0;
398
399 for (i = 0; i < cpu_count; i++) {
400 if (!newer[i].online)
401 continue;
402
403 /* When cpuset is changed on the fly, the CPUs might get reordered.
404 * We could either reset all counters, or check that the substractions
405 * below will return expected results.
406 */
407 if (newer[i].user > older[i].user)
408 diff[i].user = newer[i].user - older[i].user;
409 else
410 diff[i].user = 0;
411
412 if (newer[i].system > older[i].system)
413 diff[i].system = newer[i].system - older[i].system;
414 else
415 diff[i].system = 0;
416
417 if (newer[i].idle > older[i].idle)
418 diff[i].idle = newer[i].idle - older[i].idle;
419 else
420 diff[i].idle = 0;
421
422 sum += diff[i].user;
423 sum += diff[i].system;
424 sum += diff[i].idle;
425 }
426
427 return sum;
428}
429
430/*
431 * Read cgroup CPU quota parameters from `cpu.cfs_quota_us` or `cpu.cfs_period_us`,
432 * depending on `param`. Parameter value is returned throuh `value`.
433 */
434static bool read_cpu_cfs_param(const char *cg, const char *param, int64_t *value)
435{
436 __do_free char *str = NULL;
437 char file[11 + 6 + 1]; /* cpu.cfs__us + quota/period + \0 */
438
439 snprintf(file, sizeof(file), "cpu.cfs_%s_us", param);
440
441 if (!cgroup_ops->get(cgroup_ops, "cpu", cg, file, &str))
442 return false;
443
444 if (sscanf(str, "%ld", value) != 1)
445 return false;
446
447 return true;
448}
449
450/*
451 * Return the exact number of visible CPUs based on CPU quotas.
452 * If there is no quota set, zero is returned.
453 */
454static double exact_cpu_count(const char *cg)
455{
456 double rv;
457 int nprocs;
458 int64_t cfs_quota, cfs_period;
459
460 if (!read_cpu_cfs_param(cg, "quota", &cfs_quota))
461 return 0;
462
463 if (!read_cpu_cfs_param(cg, "period", &cfs_period))
464 return 0;
465
466 if (cfs_quota <= 0 || cfs_period <= 0)
467 return 0;
468
469 rv = (double)cfs_quota / (double)cfs_period;
470
471 nprocs = get_nprocs();
472
473 if (rv > nprocs)
474 rv = nprocs;
475
476 return rv;
477}
478
479/*
480 * Return the maximum number of visible CPUs based on CPU quotas.
481 * If there is no quota set, zero is returned.
482 */
4ec5c9da 483int max_cpu_count(const char *cg)
1f5596dd
CB
484{
485 int rv, nprocs;
486 int64_t cfs_quota, cfs_period;
487 int nr_cpus_in_cpuset = 0;
488 char *cpuset = NULL;
489
490 if (!read_cpu_cfs_param(cg, "quota", &cfs_quota))
491 return 0;
492
493 if (!read_cpu_cfs_param(cg, "period", &cfs_period))
494 return 0;
495
496 cpuset = get_cpuset(cg);
497 if (cpuset)
498 nr_cpus_in_cpuset = cpu_number_in_cpuset(cpuset);
499
500 if (cfs_quota <= 0 || cfs_period <= 0){
501 if (nr_cpus_in_cpuset > 0)
502 return nr_cpus_in_cpuset;
503
504 return 0;
505 }
506
507 rv = cfs_quota / cfs_period;
508
509 /* In case quota/period does not yield a whole number, add one CPU for
510 * the remainder.
511 */
512 if ((cfs_quota % cfs_period) > 0)
513 rv += 1;
514
515 nprocs = get_nprocs();
516
517 if (rv > nprocs)
518 rv = nprocs;
519
520 /* use min value in cpu quota and cpuset */
521 if (nr_cpus_in_cpuset > 0 && nr_cpus_in_cpuset < rv)
522 rv = nr_cpus_in_cpuset;
523
524 return rv;
525}
526
527int cpuview_proc_stat(const char *cg, const char *cpuset,
528 struct cpuacct_usage *cg_cpu_usage, int cg_cpu_usage_size,
529 FILE *f, char *buf, size_t buf_size)
530{
531 __do_free char *line = NULL;
532 __do_free struct cpuacct_usage *diff = NULL;
533 size_t linelen = 0, total_len = 0, l;
534 int curcpu = -1; /* cpu numbering starts at 0 */
535 int physcpu, i;
536 int max_cpus = max_cpu_count(cg), cpu_cnt = 0;
537 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0,
538 irq = 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0;
539 unsigned long user_sum = 0, system_sum = 0, idle_sum = 0;
540 unsigned long user_surplus = 0, system_surplus = 0;
541 unsigned long total_sum, threshold;
542 struct cg_proc_stat *stat_node;
543 int nprocs = get_nprocs_conf();
544
545 if (cg_cpu_usage_size < nprocs)
546 nprocs = cg_cpu_usage_size;
547
548 /* Read all CPU stats and stop when we've encountered other lines */
549 while (getline(&line, &linelen, f) != -1) {
550 int ret;
551 char cpu_char[10]; /* That's a lot of cores */
552 uint64_t all_used, cg_used;
553
554 if (strlen(line) == 0)
555 continue;
556
557 /* not a ^cpuN line containing a number N */
558 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1)
559 break;
560
561 if (sscanf(cpu_char, "%d", &physcpu) != 1)
562 continue;
563
564 if (physcpu >= cg_cpu_usage_size)
565 continue;
566
567 curcpu ++;
568 cpu_cnt ++;
569
570 if (!cpu_in_cpuset(physcpu, cpuset)) {
571 for (i = curcpu; i <= physcpu; i++)
572 cg_cpu_usage[i].online = false;
573 continue;
574 }
575
576 if (curcpu < physcpu) {
577 /* Some CPUs may be disabled */
578 for (i = curcpu; i < physcpu; i++)
579 cg_cpu_usage[i].online = false;
580
581 curcpu = physcpu;
582 }
583
584 cg_cpu_usage[curcpu].online = true;
585
586 ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
587 &user,
588 &nice,
589 &system,
590 &idle,
591 &iowait,
592 &irq,
593 &softirq,
594 &steal,
595 &guest,
596 &guest_nice);
597
598 if (ret != 10)
599 continue;
600
601 all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice;
602 cg_used = cg_cpu_usage[curcpu].user + cg_cpu_usage[curcpu].system;
603
604 if (all_used >= cg_used) {
605 cg_cpu_usage[curcpu].idle = idle + (all_used - cg_used);
606
607 } else {
608 lxcfs_error("cpu%d from %s has unexpected cpu time: %lu in /proc/stat, "
609 "%lu in cpuacct.usage_all; unable to determine idle time\n",
610 curcpu, cg, all_used, cg_used);
611 cg_cpu_usage[curcpu].idle = idle;
612 }
613 }
614
615 /* Cannot use more CPUs than is available due to cpuset */
616 if (max_cpus > cpu_cnt)
617 max_cpus = cpu_cnt;
618
619 stat_node = find_or_create_proc_stat_node(cg_cpu_usage, nprocs, cg);
620
621 if (!stat_node) {
622 lxcfs_error("unable to find/create stat node for %s\n", cg);
623 return 0;
624 }
625
626 diff = malloc(sizeof(struct cpuacct_usage) * nprocs);
627 if (!diff) {
628 return 0;
629 }
630
631 /*
632 * If the new values are LOWER than values stored in memory, it means
633 * the cgroup has been reset/recreated and we should reset too.
634 */
635 for (curcpu = 0; curcpu < nprocs; curcpu++) {
636 if (!cg_cpu_usage[curcpu].online)
637 continue;
638
639 if (cg_cpu_usage[curcpu].user < stat_node->usage[curcpu].user)
640 reset_proc_stat_node(stat_node, cg_cpu_usage, nprocs);
641
642 break;
643 }
644
645 total_sum = diff_cpu_usage(stat_node->usage, cg_cpu_usage, diff, nprocs);
646
647 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
648 stat_node->usage[curcpu].online = cg_cpu_usage[curcpu].online;
649
650 if (!stat_node->usage[curcpu].online)
651 continue;
652
653 i++;
654
655 stat_node->usage[curcpu].user += diff[curcpu].user;
656 stat_node->usage[curcpu].system += diff[curcpu].system;
657 stat_node->usage[curcpu].idle += diff[curcpu].idle;
658
659 if (max_cpus > 0 && i >= max_cpus) {
660 user_surplus += diff[curcpu].user;
661 system_surplus += diff[curcpu].system;
662 }
663 }
664
665 /* Calculate usage counters of visible CPUs */
666 if (max_cpus > 0) {
667 unsigned long diff_user = 0;
668 unsigned long diff_system = 0;
669 unsigned long diff_idle = 0;
670 unsigned long max_diff_idle = 0;
671 unsigned long max_diff_idle_index = 0;
672 double exact_cpus;
673
674 /* threshold = maximum usage per cpu, including idle */
675 threshold = total_sum / cpu_cnt * max_cpus;
676
677 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
678 if (!stat_node->usage[curcpu].online)
679 continue;
680
681 i++;
682
683 if (i == max_cpus)
684 break;
685
686 if (diff[curcpu].user + diff[curcpu].system >= threshold)
687 continue;
688
689 /* Add user */
690 add_cpu_usage(&user_surplus, &diff[curcpu],
691 &diff[curcpu].user, threshold);
692
693 if (diff[curcpu].user + diff[curcpu].system >= threshold)
694 continue;
695
696 /* If there is still room, add system */
697 add_cpu_usage(&system_surplus, &diff[curcpu],
698 &diff[curcpu].system, threshold);
699 }
700
701 if (user_surplus > 0)
702 lxcfs_debug("leftover user: %lu for %s\n", user_surplus, cg);
703 if (system_surplus > 0)
704 lxcfs_debug("leftover system: %lu for %s\n", system_surplus, cg);
705
706 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
707 if (!stat_node->usage[curcpu].online)
708 continue;
709
710 i++;
711
712 if (i == max_cpus)
713 break;
714
715 stat_node->view[curcpu].user += diff[curcpu].user;
716 stat_node->view[curcpu].system += diff[curcpu].system;
717 stat_node->view[curcpu].idle += diff[curcpu].idle;
718
719 user_sum += stat_node->view[curcpu].user;
720 system_sum += stat_node->view[curcpu].system;
721 idle_sum += stat_node->view[curcpu].idle;
722
723 diff_user += diff[curcpu].user;
724 diff_system += diff[curcpu].system;
725 diff_idle += diff[curcpu].idle;
726 if (diff[curcpu].idle > max_diff_idle) {
727 max_diff_idle = diff[curcpu].idle;
728 max_diff_idle_index = curcpu;
729 }
730
731 lxcfs_v("curcpu: %d, diff_user: %lu, diff_system: %lu, diff_idle: %lu\n", curcpu, diff[curcpu].user, diff[curcpu].system, diff[curcpu].idle);
732 }
733 lxcfs_v("total. diff_user: %lu, diff_system: %lu, diff_idle: %lu\n", diff_user, diff_system, diff_idle);
734
735 /* revise cpu usage view to support partial cpu case. */
736 exact_cpus = exact_cpu_count(cg);
737 if (exact_cpus < (double)max_cpus){
738 unsigned long delta = (unsigned long)((double)(diff_user + diff_system + diff_idle) * (1 - exact_cpus / (double)max_cpus));
739
740 lxcfs_v("revising cpu usage view to match the exact cpu count [%f]\n", exact_cpus);
741 lxcfs_v("delta: %lu\n", delta);
742 lxcfs_v("idle_sum before: %lu\n", idle_sum);
743 idle_sum = idle_sum > delta ? idle_sum - delta : 0;
744 lxcfs_v("idle_sum after: %lu\n", idle_sum);
745
746 curcpu = max_diff_idle_index;
747 lxcfs_v("curcpu: %d, idle before: %lu\n", curcpu, stat_node->view[curcpu].idle);
748 stat_node->view[curcpu].idle = stat_node->view[curcpu].idle > delta ? stat_node->view[curcpu].idle - delta : 0;
749 lxcfs_v("curcpu: %d, idle after: %lu\n", curcpu, stat_node->view[curcpu].idle);
750 }
751 } else {
752 for (curcpu = 0; curcpu < nprocs; curcpu++) {
753 if (!stat_node->usage[curcpu].online)
754 continue;
755
756 stat_node->view[curcpu].user = stat_node->usage[curcpu].user;
757 stat_node->view[curcpu].system = stat_node->usage[curcpu].system;
758 stat_node->view[curcpu].idle = stat_node->usage[curcpu].idle;
759
760 user_sum += stat_node->view[curcpu].user;
761 system_sum += stat_node->view[curcpu].system;
762 idle_sum += stat_node->view[curcpu].idle;
763 }
764 }
765
766 /* Render the file */
767 /* cpu-all */
768 l = snprintf(buf, buf_size, "cpu %lu 0 %lu %lu 0 0 0 0 0 0\n",
769 user_sum,
770 system_sum,
771 idle_sum);
772 lxcfs_v("cpu-all: %s\n", buf);
773
774 if (l < 0) {
775 perror("Error writing to cache");
776 return 0;
777 }
778 if (l >= buf_size) {
779 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
780 return 0;
781 }
782
783 buf += l;
784 buf_size -= l;
785 total_len += l;
786
787 /* Render visible CPUs */
788 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
789 if (!stat_node->usage[curcpu].online)
790 continue;
791
792 i++;
793
794 if (max_cpus > 0 && i == max_cpus)
795 break;
796
797 l = snprintf(buf, buf_size, "cpu%d %lu 0 %lu %lu 0 0 0 0 0 0\n",
798 i,
799 stat_node->view[curcpu].user,
800 stat_node->view[curcpu].system,
801 stat_node->view[curcpu].idle);
802 lxcfs_v("cpu: %s\n", buf);
803
804 if (l < 0) {
805 perror("Error writing to cache");
806 return 0;
807
808 }
809 if (l >= buf_size) {
810 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
811 return 0;
812 }
813
814 buf += l;
815 buf_size -= l;
816 total_len += l;
817 }
818
819 /* Pass the rest of /proc/stat, start with the last line read */
820 l = snprintf(buf, buf_size, "%s", line);
821
822 if (l < 0) {
823 perror("Error writing to cache");
824 return 0;
825
826 }
827 if (l >= buf_size) {
828 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
829 return 0;
830 }
831
832 buf += l;
833 buf_size -= l;
834 total_len += l;
835
836 /* Pass the rest of the host's /proc/stat */
837 while (getline(&line, &linelen, f) != -1) {
838 l = snprintf(buf, buf_size, "%s", line);
839 if (l < 0) {
840 perror("Error writing to cache");
841 return 0;
842 }
843 if (l >= buf_size) {
844 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
845 return 0;
846 }
847 buf += l;
848 buf_size -= l;
849 total_len += l;
850 }
851
852 if (stat_node)
853 pthread_mutex_unlock(&stat_node->lock);
854 return total_len;
855}
856
857/*
858 * check whether this is a '^processor" line in /proc/cpuinfo
859 */
860static bool is_processor_line(const char *line)
861{
862 int cpu;
863
864 if (sscanf(line, "processor : %d", &cpu) == 1)
865 return true;
866 return false;
867}
868
869static bool cpuline_in_cpuset(const char *line, const char *cpuset)
870{
871 int cpu;
872
873 if (sscanf(line, "processor : %d", &cpu) != 1)
874 return false;
875 return cpu_in_cpuset(cpu, cpuset);
876}
877
878int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
879 struct fuse_file_info *fi)
880{
881 __do_free char *cg = NULL, *cpuset = NULL, *line = NULL;
882 __do_fclose FILE *f = NULL;
883 struct fuse_context *fc = fuse_get_context();
884 struct file_info *d = (struct file_info *)fi->fh;
885 size_t linelen = 0, total_len = 0;
886 bool am_printing = false, firstline = true, is_s390x = false;
887 int curcpu = -1, cpu, max_cpus = 0;
888 bool use_view;
889 char *cache = d->buf;
890 size_t cache_size = d->buflen;
891
892 if (offset){
893 int left;
894
895 if (offset > d->size)
896 return -EINVAL;
897
898 if (!d->cached)
899 return 0;
900
901 left = d->size - offset;
902 total_len = left > size ? size: left;
903 memcpy(buf, cache + offset, total_len);
904
905 return total_len;
906 }
907
908 pid_t initpid = lookup_initpid_in_store(fc->pid);
909 if (initpid <= 1 || is_shared_pidns(initpid))
910 initpid = fc->pid;
911 cg = get_pid_cgroup(initpid, "cpuset");
912 if (!cg)
913 return read_file_fuse("proc/cpuinfo", buf, size, d);
914 prune_init_slice(cg);
915
916 cpuset = get_cpuset(cg);
917 if (!cpuset)
918 return 0;
919
920 use_view = cgroup_ops->can_use_cpuview(cgroup_ops);
921 if (use_view)
922 max_cpus = max_cpu_count(cg);
923
924 f = fopen("/proc/cpuinfo", "r");
925 if (!f)
926 return 0;
927
928 while (getline(&line, &linelen, f) != -1) {
929 ssize_t l;
930 if (firstline) {
931 firstline = false;
932 if (strstr(line, "IBM/S390") != NULL) {
933 is_s390x = true;
934 am_printing = true;
935 continue;
936 }
937 }
938 if (strncmp(line, "# processors:", 12) == 0)
939 continue;
940 if (is_processor_line(line)) {
941 if (use_view && max_cpus > 0 && (curcpu+1) == max_cpus)
942 break;
943 am_printing = cpuline_in_cpuset(line, cpuset);
944 if (am_printing) {
945 curcpu ++;
946 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
947 if (l < 0) {
948 perror("Error writing to cache");
949 return 0;
950 }
951 if (l >= cache_size) {
952 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
953 return 0;
954 }
955 cache += l;
956 cache_size -= l;
957 total_len += l;
958 }
959 continue;
960 } else if (is_s390x && sscanf(line, "processor %d:", &cpu) == 1) {
961 char *p;
962 if (use_view && max_cpus > 0 && (curcpu+1) == max_cpus)
963 break;
964 if (!cpu_in_cpuset(cpu, cpuset))
965 continue;
966 curcpu ++;
967 p = strchr(line, ':');
968 if (!p || !*p)
969 return 0;
970 p++;
971 l = snprintf(cache, cache_size, "processor %d:%s", curcpu, p);
972 if (l < 0) {
973 perror("Error writing to cache");
974 return 0;
975 }
976 if (l >= cache_size) {
977 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
978 return 0;
979 }
980 cache += l;
981 cache_size -= l;
982 total_len += l;
983 continue;
984
985 }
986 if (am_printing) {
987 l = snprintf(cache, cache_size, "%s", line);
988 if (l < 0) {
989 perror("Error writing to cache");
990 return 0;
991 }
992 if (l >= cache_size) {
993 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
994 return 0;
995 }
996 cache += l;
997 cache_size -= l;
998 total_len += l;
999 }
1000 }
1001
1002 if (is_s390x) {
1003 __do_free char *origcache = d->buf;
1004 ssize_t l;
1005
1006 d->buf = malloc(d->buflen);
1007 if (!d->buf) {
1008 d->buf = move_ptr(origcache);
1009 return 0;
1010 }
1011
1012 cache = d->buf;
1013 cache_size = d->buflen;
1014 total_len = 0;
1015 l = snprintf(cache, cache_size, "vendor_id : IBM/S390\n");
1016 if (l < 0 || l >= cache_size)
1017 return 0;
1018
1019 cache_size -= l;
1020 cache += l;
1021 total_len += l;
1022 l = snprintf(cache, cache_size, "# processors : %d\n", curcpu + 1);
1023 if (l < 0 || l >= cache_size)
1024 return 0;
1025
1026 cache_size -= l;
1027 cache += l;
1028 total_len += l;
1029 l = snprintf(cache, cache_size, "%s", origcache);
1030 if (l < 0 || l >= cache_size)
1031 return 0;
1032 total_len += l;
1033 }
1034
1035 d->cached = 1;
1036 d->size = total_len;
1037 if (total_len > size ) total_len = size;
1038
1039 /* read from off 0 */
1040 memcpy(buf, d->buf, total_len);
1041 return total_len;
1042}
1043
1044/*
1045 * Returns 0 on success.
1046 * It is the caller's responsibility to free `return_usage`, unless this
1047 * function returns an error.
1048 */
1049int read_cpuacct_usage_all(char *cg, char *cpuset,
1050 struct cpuacct_usage **return_usage, int *size)
1051{
1052 __do_free char *usage_str = NULL;
1053 __do_free struct cpuacct_usage *cpu_usage = NULL;
1054 int cpucount = get_nprocs_conf();
9ce186dc
CB
1055 int i = 0, j = 0, read_pos = 0, read_cnt = 0;
1056 int ret;
1f5596dd
CB
1057 int cg_cpu;
1058 uint64_t cg_user, cg_system;
1059 int64_t ticks_per_sec;
1060
1061 ticks_per_sec = sysconf(_SC_CLK_TCK);
1062
1063 if (ticks_per_sec < 0 && errno == EINVAL) {
1064 lxcfs_v(
1065 "%s\n",
1066 "read_cpuacct_usage_all failed to determine number of clock ticks "
1067 "in a second");
1068 return -1;
1069 }
1070
1071 cpu_usage = malloc(sizeof(struct cpuacct_usage) * cpucount);
1072 if (!cpu_usage)
1073 return -ENOMEM;
1074
1075 memset(cpu_usage, 0, sizeof(struct cpuacct_usage) * cpucount);
1076 if (!cgroup_ops->get(cgroup_ops, "cpuacct", cg, "cpuacct.usage_all", &usage_str)) {
1077 char *data = NULL;
1f5596dd
CB
1078 size_t sz = 0, asz = 0;
1079
1080 /* read cpuacct.usage_percpu instead. */
1081 lxcfs_v("failed to read cpuacct.usage_all. reading cpuacct.usage_percpu instead\n%s", "");
1082 if (!cgroup_ops->get(cgroup_ops, "cpuacct", cg, "cpuacct.usage_percpu", &usage_str))
1083 return -1;
1084 lxcfs_v("usage_str: %s\n", usage_str);
1085
1086 /* convert cpuacct.usage_percpu into cpuacct.usage_all. */
1087 lxcfs_v("converting cpuacct.usage_percpu into cpuacct.usage_all\n%s", "");
1088
1089 must_strcat(&data, &sz, &asz, "cpu user system\n");
1090
1091 while (sscanf(usage_str + read_pos, "%lu %n", &cg_user, &read_cnt) > 0) {
1092 lxcfs_debug("i: %d, cg_user: %lu, read_pos: %d, read_cnt: %d\n", i, cg_user, read_pos, read_cnt);
1093 must_strcat(&data, &sz, &asz, "%d %lu 0\n", i, cg_user);
1094 i++;
1095 read_pos += read_cnt;
1096 }
1097
1098 usage_str = data;
1099
1100 lxcfs_v("usage_str: %s\n", usage_str);
1101 }
1102
1103 if (sscanf(usage_str, "cpu user system\n%n", &read_cnt) != 0) {
1104 lxcfs_error("read_cpuacct_usage_all reading first line from "
1105 "%s/cpuacct.usage_all failed.\n", cg);
1106 return -1;
1107 }
1108
1109 read_pos += read_cnt;
1110
1111 for (i = 0, j = 0; i < cpucount; i++) {
1112 ret = sscanf(usage_str + read_pos, "%d %lu %lu\n%n", &cg_cpu, &cg_user,
1113 &cg_system, &read_cnt);
1114
1115 if (ret == EOF)
1116 break;
1117
1118 if (ret != 3) {
1119 lxcfs_error("read_cpuacct_usage_all reading from %s/cpuacct.usage_all "
1120 "failed.\n", cg);
1121 return -1;
1122 }
1123
1124 read_pos += read_cnt;
1125
1126 /* Convert the time from nanoseconds to USER_HZ */
1127 cpu_usage[j].user = cg_user / 1000.0 / 1000 / 1000 * ticks_per_sec;
1128 cpu_usage[j].system = cg_system / 1000.0 / 1000 / 1000 * ticks_per_sec;
1129 j++;
1130 }
1131
1132 *return_usage = move_ptr(cpu_usage);
1133 *size = cpucount;
1134 return 0;
1135}
1136
1137static bool cpuview_init_head(struct cg_proc_stat_head **head)
1138{
1139 *head = malloc(sizeof(struct cg_proc_stat_head));
1140 if (!(*head)) {
1141 lxcfs_error("%s\n", strerror(errno));
1142 return false;
1143 }
1144
1145 (*head)->lastcheck = time(NULL);
1146 (*head)->next = NULL;
1147
1148 if (pthread_rwlock_init(&(*head)->lock, NULL) != 0) {
1149 lxcfs_error("%s\n", "Failed to initialize list lock");
1150 free_disarm(*head);
1151 return false;
1152 }
1153
1154 return true;
1155}
1156
4ec5c9da 1157bool init_cpuview(void)
1f5596dd
CB
1158{
1159 int i;
1160
1161 for (i = 0; i < CPUVIEW_HASH_SIZE; i++)
1162 proc_stat_history[i] = NULL;
1163
1164 for (i = 0; i < CPUVIEW_HASH_SIZE; i++) {
1165 if (!cpuview_init_head(&proc_stat_history[i]))
1166 goto err;
1167 }
1168
1169 return true;
1170
1171err:
1172 for (i = 0; i < CPUVIEW_HASH_SIZE; i++) {
1173 if (proc_stat_history[i])
1174 free_disarm(proc_stat_history[i]);
1175 }
1176
1177 return false;
1178}
1179
1f5596dd
CB
1180static void cpuview_free_head(struct cg_proc_stat_head *head)
1181{
1182 struct cg_proc_stat *node, *tmp;
1183
1184 if (head->next) {
1185 node = head->next;
1186
1187 for (;;) {
1188 tmp = node;
1189 node = node->next;
1190 free_proc_stat_node(tmp);
1191
1192 if (!node)
1193 break;
1194 }
1195 }
1196
1197 pthread_rwlock_destroy(&head->lock);
1198 free_disarm(head);
1199}
1200
4ec5c9da 1201void free_cpuview(void)
1f5596dd 1202{
4ec5c9da 1203 for (int i = 0; i < CPUVIEW_HASH_SIZE; i++)
1f5596dd
CB
1204 if (proc_stat_history[i])
1205 cpuview_free_head(proc_stat_history[i]);
1f5596dd 1206}