]> git.proxmox.com Git - mirror_lxcfs.git/blob - proc_cpuview.c
lxcfs: add --disable-cfs
[mirror_lxcfs.git] / proc_cpuview.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6
7 #ifndef FUSE_USE_VERSION
8 #define FUSE_USE_VERSION 26
9 #endif
10
11 #define _FILE_OFFSET_BITS 64
12
13 #define __STDC_FORMAT_MACROS
14 #include <dirent.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <fuse.h>
18 #include <inttypes.h>
19 #include <libgen.h>
20 #include <pthread.h>
21 #include <sched.h>
22 #include <stdarg.h>
23 #include <stdbool.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29 #include <unistd.h>
30 #include <wait.h>
31 #include <linux/magic.h>
32 #include <linux/sched.h>
33 #include <sys/epoll.h>
34 #include <sys/mman.h>
35 #include <sys/mount.h>
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/syscall.h>
39 #include <sys/sysinfo.h>
40 #include <sys/vfs.h>
41
42 #include "bindings.h"
43 #include "config.h"
44 #include "cgroup_fuse.h"
45 #include "cpuset_parse.h"
46 #include "cgroups/cgroup.h"
47 #include "cgroups/cgroup_utils.h"
48 #include "memory_utils.h"
49 #include "proc_loadavg.h"
50 #include "utils.h"
51
52 /* Data for CPU view */
53 struct cg_proc_stat {
54 char *cg;
55 struct cpuacct_usage *usage; // Real usage as read from the host's /proc/stat
56 struct cpuacct_usage *view; // Usage stats reported to the container
57 int cpu_count;
58 pthread_mutex_t lock; // For node manipulation
59 struct cg_proc_stat *next;
60 };
61
62 struct cg_proc_stat_head {
63 struct cg_proc_stat *next;
64 time_t lastcheck;
65
66 /*
67 * For access to the list. Reading can be parallel, pruning is exclusive.
68 */
69 pthread_rwlock_t lock;
70 };
71
72 #define CPUVIEW_HASH_SIZE 100
73 static struct cg_proc_stat_head *proc_stat_history[CPUVIEW_HASH_SIZE];
74
75 static void reset_proc_stat_node(struct cg_proc_stat *node, struct cpuacct_usage *usage, int cpu_count)
76 {
77 int i;
78
79 lxcfs_debug("Resetting stat node for %s\n", node->cg);
80 memcpy(node->usage, usage, sizeof(struct cpuacct_usage) * cpu_count);
81
82 for (i = 0; i < cpu_count; i++) {
83 node->view[i].user = 0;
84 node->view[i].system = 0;
85 node->view[i].idle = 0;
86 }
87
88 node->cpu_count = cpu_count;
89 }
90
91 static bool expand_proc_stat_node(struct cg_proc_stat *node, int cpu_count)
92 {
93 __do_free struct cpuacct_usage *new_usage = NULL, *new_view = NULL;
94
95 /* Allocate new memory */
96 new_usage = malloc(sizeof(struct cpuacct_usage) * cpu_count);
97 if (!new_usage)
98 return false;
99
100 new_view = malloc(sizeof(struct cpuacct_usage) * cpu_count);
101 if (!new_view)
102 return false;
103
104 /* Copy existing data & initialize new elements */
105 for (int i = 0; i < cpu_count; i++) {
106 if (i < node->cpu_count) {
107 new_usage[i].user = node->usage[i].user;
108 new_usage[i].system = node->usage[i].system;
109 new_usage[i].idle = node->usage[i].idle;
110
111 new_view[i].user = node->view[i].user;
112 new_view[i].system = node->view[i].system;
113 new_view[i].idle = node->view[i].idle;
114 } else {
115 new_usage[i].user = 0;
116 new_usage[i].system = 0;
117 new_usage[i].idle = 0;
118
119 new_view[i].user = 0;
120 new_view[i].system = 0;
121 new_view[i].idle = 0;
122 }
123 }
124
125 free(node->usage);
126 node->usage = move_ptr(new_usage);
127
128 free(node->view);
129 node->view = move_ptr(new_view);
130 node->cpu_count = cpu_count;
131
132 return true;
133 }
134
135 static void free_proc_stat_node(struct cg_proc_stat *node)
136 {
137 pthread_mutex_destroy(&node->lock);
138 free_disarm(node->cg);
139 free_disarm(node->usage);
140 free_disarm(node->view);
141 free_disarm(node);
142 }
143
144 static struct cg_proc_stat *add_proc_stat_node(struct cg_proc_stat *new_node)
145 {
146 int hash = calc_hash(new_node->cg) % CPUVIEW_HASH_SIZE;
147 struct cg_proc_stat_head *head = proc_stat_history[hash];
148 struct cg_proc_stat *node, *rv = new_node;
149
150 pthread_rwlock_wrlock(&head->lock);
151
152 if (!head->next) {
153 head->next = new_node;
154 goto out;
155 }
156
157 node = head->next;
158
159 for (;;) {
160 if (strcmp(node->cg, new_node->cg) == 0) {
161 /* The node is already present, return it */
162 free_proc_stat_node(new_node);
163 rv = node;
164 goto out;
165 }
166
167 if (node->next) {
168 node = node->next;
169 continue;
170 }
171
172 node->next = new_node;
173 goto out;
174 }
175
176 out:
177 pthread_rwlock_unlock(&head->lock);
178 return rv;
179 }
180
181 static struct cg_proc_stat *new_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)
182 {
183 struct cg_proc_stat *node;
184 int i;
185
186 node = malloc(sizeof(struct cg_proc_stat));
187 if (!node)
188 goto err;
189
190 node->cg = NULL;
191 node->usage = NULL;
192 node->view = NULL;
193
194 node->cg = malloc(strlen(cg) + 1);
195 if (!node->cg)
196 goto err;
197
198 strcpy(node->cg, cg);
199
200 node->usage = malloc(sizeof(struct cpuacct_usage) * cpu_count);
201 if (!node->usage)
202 goto err;
203
204 memcpy(node->usage, usage, sizeof(struct cpuacct_usage) * cpu_count);
205
206 node->view = malloc(sizeof(struct cpuacct_usage) * cpu_count);
207 if (!node->view)
208 goto err;
209
210 node->cpu_count = cpu_count;
211 node->next = NULL;
212
213 if (pthread_mutex_init(&node->lock, NULL) != 0) {
214 lxcfs_error("%s\n", "Failed to initialize node lock");
215 goto err;
216 }
217
218 for (i = 0; i < cpu_count; i++) {
219 node->view[i].user = 0;
220 node->view[i].system = 0;
221 node->view[i].idle = 0;
222 }
223
224 return node;
225
226 err:
227 if (node && node->cg)
228 free(node->cg);
229 if (node && node->usage)
230 free(node->usage);
231 if (node && node->view)
232 free(node->view);
233 if (node)
234 free(node);
235
236 return NULL;
237 }
238
239 static bool cgfs_param_exist(const char *controller, const char *cgroup,
240 const char *file)
241 {
242 __do_free char *path = NULL;
243 int cfd;
244
245 cfd = get_cgroup_fd(controller);
246 if (cfd < 0)
247 return false;
248
249 path = must_make_path(dot_or_empty(cgroup), cgroup, file);
250 return (faccessat(cfd, path, F_OK, 0) == 0);
251 }
252
253 static struct cg_proc_stat *prune_proc_stat_list(struct cg_proc_stat *node)
254 {
255 struct cg_proc_stat *first = NULL, *prev, *tmp;
256
257 for (prev = NULL; node; ) {
258 if (!cgfs_param_exist("cpu", node->cg, "cpu.shares")) {
259 tmp = node;
260 lxcfs_debug("Removing stat node for %s\n", node->cg);
261
262 if (prev)
263 prev->next = node->next;
264 else
265 first = node->next;
266
267 node = node->next;
268 free_proc_stat_node(tmp);
269 } else {
270 if (!first)
271 first = node;
272 prev = node;
273 node = node->next;
274 }
275 }
276
277 return first;
278 }
279
280 #define PROC_STAT_PRUNE_INTERVAL 10
281 static void prune_proc_stat_history(void)
282 {
283 int i;
284 time_t now = time(NULL);
285
286 for (i = 0; i < CPUVIEW_HASH_SIZE; i++) {
287 pthread_rwlock_wrlock(&proc_stat_history[i]->lock);
288
289 if ((proc_stat_history[i]->lastcheck + PROC_STAT_PRUNE_INTERVAL) > now) {
290 pthread_rwlock_unlock(&proc_stat_history[i]->lock);
291 return;
292 }
293
294 if (proc_stat_history[i]->next) {
295 proc_stat_history[i]->next = prune_proc_stat_list(proc_stat_history[i]->next);
296 proc_stat_history[i]->lastcheck = now;
297 }
298
299 pthread_rwlock_unlock(&proc_stat_history[i]->lock);
300 }
301 }
302
303 static struct cg_proc_stat *find_proc_stat_node(struct cg_proc_stat_head *head,
304 const char *cg)
305 {
306 struct cg_proc_stat *node;
307
308 pthread_rwlock_rdlock(&head->lock);
309
310 if (!head->next) {
311 pthread_rwlock_unlock(&head->lock);
312 return NULL;
313 }
314
315 node = head->next;
316
317 do {
318 if (strcmp(cg, node->cg) == 0)
319 goto out;
320 } while ((node = node->next));
321
322 node = NULL;
323
324 out:
325 pthread_rwlock_unlock(&head->lock);
326 prune_proc_stat_history();
327 return node;
328 }
329
330 static struct cg_proc_stat *find_or_create_proc_stat_node(struct cpuacct_usage *usage, int cpu_count, const char *cg)
331 {
332 int hash = calc_hash(cg) % CPUVIEW_HASH_SIZE;
333 struct cg_proc_stat_head *head = proc_stat_history[hash];
334 struct cg_proc_stat *node;
335
336 node = find_proc_stat_node(head, cg);
337
338 if (!node) {
339 node = new_proc_stat_node(usage, cpu_count, cg);
340 if (!node)
341 return NULL;
342
343 node = add_proc_stat_node(node);
344 lxcfs_debug("New stat node (%d) for %s\n", cpu_count, cg);
345 }
346
347 pthread_mutex_lock(&node->lock);
348
349 /* If additional CPUs on the host have been enabled, CPU usage counter
350 * arrays have to be expanded */
351 if (node->cpu_count < cpu_count) {
352 lxcfs_debug("Expanding stat node %d->%d for %s\n",
353 node->cpu_count, cpu_count, cg);
354
355 if (!expand_proc_stat_node(node, cpu_count)) {
356 pthread_mutex_unlock(&node->lock);
357 lxcfs_debug("Unable to expand stat node %d->%d for %s\n",
358 node->cpu_count, cpu_count, cg);
359 return NULL;
360 }
361 }
362
363 return node;
364 }
365
366 static void add_cpu_usage(uint64_t *surplus, struct cpuacct_usage *usage,
367 uint64_t *counter, uint64_t threshold)
368 {
369 unsigned long free_space, to_add;
370
371 free_space = threshold - usage->user - usage->system;
372
373 if (free_space > usage->idle)
374 free_space = usage->idle;
375
376 to_add = free_space > *surplus ? *surplus : free_space;
377
378 *counter += to_add;
379 usage->idle -= to_add;
380 *surplus -= to_add;
381 }
382
383 static unsigned long diff_cpu_usage(struct cpuacct_usage *older,
384 struct cpuacct_usage *newer,
385 struct cpuacct_usage *diff, int cpu_count)
386 {
387 int i;
388 unsigned long sum = 0;
389
390 for (i = 0; i < cpu_count; i++) {
391 if (!newer[i].online)
392 continue;
393
394 /* When cpuset is changed on the fly, the CPUs might get reordered.
395 * We could either reset all counters, or check that the substractions
396 * below will return expected results.
397 */
398 if (newer[i].user > older[i].user)
399 diff[i].user = newer[i].user - older[i].user;
400 else
401 diff[i].user = 0;
402
403 if (newer[i].system > older[i].system)
404 diff[i].system = newer[i].system - older[i].system;
405 else
406 diff[i].system = 0;
407
408 if (newer[i].idle > older[i].idle)
409 diff[i].idle = newer[i].idle - older[i].idle;
410 else
411 diff[i].idle = 0;
412
413 sum += diff[i].user;
414 sum += diff[i].system;
415 sum += diff[i].idle;
416 }
417
418 return sum;
419 }
420
421 /*
422 * Read cgroup CPU quota parameters from `cpu.cfs_quota_us` or `cpu.cfs_period_us`,
423 * depending on `param`. Parameter value is returned throuh `value`.
424 */
425 static bool read_cpu_cfs_param(const char *cg, const char *param, int64_t *value)
426 {
427 __do_free char *str = NULL;
428 char file[11 + 6 + 1]; /* cpu.cfs__us + quota/period + \0 */
429
430 snprintf(file, sizeof(file), "cpu.cfs_%s_us", param);
431
432 if (!cgroup_ops->get(cgroup_ops, "cpu", cg, file, &str))
433 return false;
434
435 if (sscanf(str, "%"PRId64, value) != 1)
436 return false;
437
438 return true;
439 }
440
441 /*
442 * Return the exact number of visible CPUs based on CPU quotas.
443 * If there is no quota set, zero is returned.
444 */
445 static double exact_cpu_count(const char *cg)
446 {
447 double rv;
448 int nprocs;
449 int64_t cfs_quota, cfs_period;
450
451 if (!read_cpu_cfs_param(cg, "quota", &cfs_quota))
452 return 0;
453
454 if (!read_cpu_cfs_param(cg, "period", &cfs_period))
455 return 0;
456
457 if (cfs_quota <= 0 || cfs_period <= 0)
458 return 0;
459
460 rv = (double)cfs_quota / (double)cfs_period;
461
462 nprocs = get_nprocs();
463
464 if (rv > nprocs)
465 rv = nprocs;
466
467 return rv;
468 }
469
470 /*
471 * Return the maximum number of visible CPUs based on CPU quotas.
472 * If there is no quota set, zero is returned.
473 */
474 int max_cpu_count(const char *cg)
475 {
476 __do_free char *cpuset = NULL;
477 int rv, nprocs;
478 int64_t cfs_quota, cfs_period;
479 int nr_cpus_in_cpuset = 0;
480
481 if (!read_cpu_cfs_param(cg, "quota", &cfs_quota))
482 return 0;
483
484 if (!read_cpu_cfs_param(cg, "period", &cfs_period))
485 return 0;
486
487 cpuset = get_cpuset(cg);
488 if (cpuset)
489 nr_cpus_in_cpuset = cpu_number_in_cpuset(cpuset);
490
491 if (cfs_quota <= 0 || cfs_period <= 0){
492 if (nr_cpus_in_cpuset > 0)
493 return nr_cpus_in_cpuset;
494
495 return 0;
496 }
497
498 rv = cfs_quota / cfs_period;
499
500 /* In case quota/period does not yield a whole number, add one CPU for
501 * the remainder.
502 */
503 if ((cfs_quota % cfs_period) > 0)
504 rv += 1;
505
506 nprocs = get_nprocs();
507
508 if (rv > nprocs)
509 rv = nprocs;
510
511 /* use min value in cpu quota and cpuset */
512 if (nr_cpus_in_cpuset > 0 && nr_cpus_in_cpuset < rv)
513 rv = nr_cpus_in_cpuset;
514
515 return rv;
516 }
517
518 int cpuview_proc_stat(const char *cg, const char *cpuset,
519 struct cpuacct_usage *cg_cpu_usage, int cg_cpu_usage_size,
520 FILE *f, char *buf, size_t buf_size)
521 {
522 __do_free char *line = NULL;
523 __do_free struct cpuacct_usage *diff = NULL;
524 size_t linelen = 0, total_len = 0, l;
525 int curcpu = -1; /* cpu numbering starts at 0 */
526 int physcpu, i;
527 int max_cpus = max_cpu_count(cg), cpu_cnt = 0;
528 uint64_t user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0,
529 softirq = 0, steal = 0, guest = 0, guest_nice = 0;
530 uint64_t user_sum = 0, system_sum = 0, idle_sum = 0;
531 uint64_t user_surplus = 0, system_surplus = 0;
532 uint64_t total_sum, threshold;
533 struct cg_proc_stat *stat_node;
534 int nprocs = get_nprocs_conf();
535
536 if (cg_cpu_usage_size < nprocs)
537 nprocs = cg_cpu_usage_size;
538
539 /* Read all CPU stats and stop when we've encountered other lines */
540 while (getline(&line, &linelen, f) != -1) {
541 int ret;
542 char cpu_char[10]; /* That's a lot of cores */
543 uint64_t all_used, cg_used;
544
545 if (strlen(line) == 0)
546 continue;
547
548 /* not a ^cpuN line containing a number N */
549 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1)
550 break;
551
552 if (sscanf(cpu_char, "%d", &physcpu) != 1)
553 continue;
554
555 if (physcpu >= cg_cpu_usage_size)
556 continue;
557
558 curcpu ++;
559 cpu_cnt ++;
560
561 if (!cpu_in_cpuset(physcpu, cpuset)) {
562 for (i = curcpu; i <= physcpu; i++)
563 cg_cpu_usage[i].online = false;
564 continue;
565 }
566
567 if (curcpu < physcpu) {
568 /* Some CPUs may be disabled */
569 for (i = curcpu; i < physcpu; i++)
570 cg_cpu_usage[i].online = false;
571
572 curcpu = physcpu;
573 }
574
575 cg_cpu_usage[curcpu].online = true;
576
577 ret = sscanf(line, "%*s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 "lu",
578 &user,
579 &nice,
580 &system,
581 &idle,
582 &iowait,
583 &irq,
584 &softirq,
585 &steal,
586 &guest,
587 &guest_nice);
588
589 if (ret != 10)
590 continue;
591
592 all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice;
593 cg_used = cg_cpu_usage[curcpu].user + cg_cpu_usage[curcpu].system;
594
595 if (all_used >= cg_used) {
596 cg_cpu_usage[curcpu].idle = idle + (all_used - cg_used);
597
598 } else {
599 lxcfs_error("cpu%d from %s has unexpected cpu time: %" PRIu64 " in /proc/stat, %" PRIu64 " in cpuacct.usage_all; unable to determine idle time",
600 curcpu, cg, all_used, cg_used);
601 cg_cpu_usage[curcpu].idle = idle;
602 }
603 }
604
605 /* Cannot use more CPUs than is available due to cpuset */
606 if (max_cpus > cpu_cnt)
607 max_cpus = cpu_cnt;
608
609 stat_node = find_or_create_proc_stat_node(cg_cpu_usage, nprocs, cg);
610 if (!stat_node) {
611 lxcfs_error("unable to find/create stat node for %s\n", cg);
612 return 0;
613 }
614
615 diff = malloc(sizeof(struct cpuacct_usage) * nprocs);
616 if (!diff)
617 return 0;
618
619 /*
620 * If the new values are LOWER than values stored in memory, it means
621 * the cgroup has been reset/recreated and we should reset too.
622 */
623 for (curcpu = 0; curcpu < nprocs; curcpu++) {
624 if (!cg_cpu_usage[curcpu].online)
625 continue;
626
627 if (cg_cpu_usage[curcpu].user < stat_node->usage[curcpu].user)
628 reset_proc_stat_node(stat_node, cg_cpu_usage, nprocs);
629
630 break;
631 }
632
633 total_sum = diff_cpu_usage(stat_node->usage, cg_cpu_usage, diff, nprocs);
634
635 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
636 stat_node->usage[curcpu].online = cg_cpu_usage[curcpu].online;
637
638 if (!stat_node->usage[curcpu].online)
639 continue;
640
641 i++;
642
643 stat_node->usage[curcpu].user += diff[curcpu].user;
644 stat_node->usage[curcpu].system += diff[curcpu].system;
645 stat_node->usage[curcpu].idle += diff[curcpu].idle;
646
647 if (max_cpus > 0 && i >= max_cpus) {
648 user_surplus += diff[curcpu].user;
649 system_surplus += diff[curcpu].system;
650 }
651 }
652
653 /* Calculate usage counters of visible CPUs */
654 if (max_cpus > 0) {
655 uint64_t diff_user = 0;
656 uint64_t diff_system = 0;
657 uint64_t diff_idle = 0;
658 uint64_t max_diff_idle = 0;
659 uint64_t max_diff_idle_index = 0;
660 double exact_cpus;
661
662 /* threshold = maximum usage per cpu, including idle */
663 threshold = total_sum / cpu_cnt * max_cpus;
664
665 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
666 if (!stat_node->usage[curcpu].online)
667 continue;
668
669 i++;
670
671 if (i == max_cpus)
672 break;
673
674 if (diff[curcpu].user + diff[curcpu].system >= threshold)
675 continue;
676
677 /* Add user */
678 add_cpu_usage(&user_surplus, &diff[curcpu],
679 &diff[curcpu].user, threshold);
680
681 if (diff[curcpu].user + diff[curcpu].system >= threshold)
682 continue;
683
684 /* If there is still room, add system */
685 add_cpu_usage(&system_surplus, &diff[curcpu],
686 &diff[curcpu].system, threshold);
687 }
688
689 if (user_surplus > 0)
690 lxcfs_debug("leftover user: %lu for %s\n", user_surplus, cg);
691 if (system_surplus > 0)
692 lxcfs_debug("leftover system: %lu for %s\n", system_surplus, cg);
693
694 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
695 if (!stat_node->usage[curcpu].online)
696 continue;
697
698 i++;
699
700 if (i == max_cpus)
701 break;
702
703 stat_node->view[curcpu].user += diff[curcpu].user;
704 stat_node->view[curcpu].system += diff[curcpu].system;
705 stat_node->view[curcpu].idle += diff[curcpu].idle;
706
707 user_sum += stat_node->view[curcpu].user;
708 system_sum += stat_node->view[curcpu].system;
709 idle_sum += stat_node->view[curcpu].idle;
710
711 diff_user += diff[curcpu].user;
712 diff_system += diff[curcpu].system;
713 diff_idle += diff[curcpu].idle;
714 if (diff[curcpu].idle > max_diff_idle) {
715 max_diff_idle = diff[curcpu].idle;
716 max_diff_idle_index = curcpu;
717 }
718
719 lxcfs_v("curcpu: %d, diff_user: %lu, diff_system: %lu, diff_idle: %lu\n", curcpu, diff[curcpu].user, diff[curcpu].system, diff[curcpu].idle);
720 }
721 lxcfs_v("total. diff_user: %lu, diff_system: %lu, diff_idle: %lu\n", diff_user, diff_system, diff_idle);
722
723 /* revise cpu usage view to support partial cpu case. */
724 exact_cpus = exact_cpu_count(cg);
725 if (exact_cpus < (double)max_cpus){
726 unsigned long delta = (unsigned long)((double)(diff_user + diff_system + diff_idle) * (1 - exact_cpus / (double)max_cpus));
727
728 lxcfs_v("revising cpu usage view to match the exact cpu count [%f]\n", exact_cpus);
729 lxcfs_v("delta: %lu\n", delta);
730 lxcfs_v("idle_sum before: %lu\n", idle_sum);
731 idle_sum = idle_sum > delta ? idle_sum - delta : 0;
732 lxcfs_v("idle_sum after: %lu\n", idle_sum);
733
734 curcpu = max_diff_idle_index;
735 lxcfs_v("curcpu: %d, idle before: %lu\n", curcpu, stat_node->view[curcpu].idle);
736 stat_node->view[curcpu].idle = stat_node->view[curcpu].idle > delta ? stat_node->view[curcpu].idle - delta : 0;
737 lxcfs_v("curcpu: %d, idle after: %lu\n", curcpu, stat_node->view[curcpu].idle);
738 }
739 } else {
740 for (curcpu = 0; curcpu < nprocs; curcpu++) {
741 if (!stat_node->usage[curcpu].online)
742 continue;
743
744 stat_node->view[curcpu].user = stat_node->usage[curcpu].user;
745 stat_node->view[curcpu].system = stat_node->usage[curcpu].system;
746 stat_node->view[curcpu].idle = stat_node->usage[curcpu].idle;
747
748 user_sum += stat_node->view[curcpu].user;
749 system_sum += stat_node->view[curcpu].system;
750 idle_sum += stat_node->view[curcpu].idle;
751 }
752 }
753
754 /* Render the file */
755 /* cpu-all */
756 l = snprintf(buf, buf_size,
757 "cpu %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n",
758 user_sum, system_sum, idle_sum);
759 lxcfs_v("cpu-all: %s\n", buf);
760
761 if (l < 0) {
762 perror("Error writing to cache");
763 return 0;
764 }
765 if (l >= buf_size) {
766 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
767 return 0;
768 }
769
770 buf += l;
771 buf_size -= l;
772 total_len += l;
773
774 /* Render visible CPUs */
775 for (curcpu = 0, i = -1; curcpu < nprocs; curcpu++) {
776 if (!stat_node->usage[curcpu].online)
777 continue;
778
779 i++;
780
781 if (max_cpus > 0 && i == max_cpus)
782 break;
783
784 l = snprintf(buf, buf_size, "cpu%d %" PRIu64 " 0 %" PRIu64 " %" PRIu64 " 0 0 0 0 0 0\n",
785 i,
786 stat_node->view[curcpu].user,
787 stat_node->view[curcpu].system,
788 stat_node->view[curcpu].idle);
789 lxcfs_v("cpu: %s\n", buf);
790
791 if (l < 0) {
792 perror("Error writing to cache");
793 return 0;
794
795 }
796 if (l >= buf_size) {
797 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
798 return 0;
799 }
800
801 buf += l;
802 buf_size -= l;
803 total_len += l;
804 }
805
806 /* Pass the rest of /proc/stat, start with the last line read */
807 l = snprintf(buf, buf_size, "%s", line);
808
809 if (l < 0) {
810 perror("Error writing to cache");
811 return 0;
812
813 }
814 if (l >= buf_size) {
815 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
816 return 0;
817 }
818
819 buf += l;
820 buf_size -= l;
821 total_len += l;
822
823 /* Pass the rest of the host's /proc/stat */
824 while (getline(&line, &linelen, f) != -1) {
825 l = snprintf(buf, buf_size, "%s", line);
826 if (l < 0) {
827 perror("Error writing to cache");
828 return 0;
829 }
830 if (l >= buf_size) {
831 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
832 return 0;
833 }
834 buf += l;
835 buf_size -= l;
836 total_len += l;
837 }
838
839 if (stat_node)
840 pthread_mutex_unlock(&stat_node->lock);
841 return total_len;
842 }
843
844 /*
845 * check whether this is a '^processor" line in /proc/cpuinfo
846 */
847 static bool is_processor_line(const char *line)
848 {
849 int cpu;
850
851 if (sscanf(line, "processor : %d", &cpu) == 1)
852 return true;
853 return false;
854 }
855
856 static bool cpuline_in_cpuset(const char *line, const char *cpuset)
857 {
858 int cpu;
859
860 if (sscanf(line, "processor : %d", &cpu) != 1)
861 return false;
862 return cpu_in_cpuset(cpu, cpuset);
863 }
864
865 int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
866 struct fuse_file_info *fi)
867 {
868 __do_free char *cg = NULL, *cpuset = NULL, *line = NULL;
869 __do_free void *fopen_cache = NULL;
870 __do_fclose FILE *f = NULL;
871 struct fuse_context *fc = fuse_get_context();
872 struct lxcfs_opts *opts = (struct lxcfs_opts *)fc->private_data;
873 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
874 size_t linelen = 0, total_len = 0;
875 bool am_printing = false, firstline = true, is_s390x = false;
876 int curcpu = -1, cpu, max_cpus = 0;
877 bool use_view;
878 char *cache = d->buf;
879 size_t cache_size = d->buflen;
880
881 if (offset){
882 int left;
883
884 if (offset > d->size)
885 return -EINVAL;
886
887 if (!d->cached)
888 return 0;
889
890 left = d->size - offset;
891 total_len = left > size ? size: left;
892 memcpy(buf, cache + offset, total_len);
893
894 return total_len;
895 }
896
897 pid_t initpid = lookup_initpid_in_store(fc->pid);
898 if (initpid <= 1 || is_shared_pidns(initpid))
899 initpid = fc->pid;
900 cg = get_pid_cgroup(initpid, "cpuset");
901 if (!cg)
902 return read_file_fuse("proc/cpuinfo", buf, size, d);
903 prune_init_slice(cg);
904
905 cpuset = get_cpuset(cg);
906 if (!cpuset)
907 return 0;
908
909 if (cgroup_ops->can_use_cpuview(cgroup_ops) && opts->use_cfs)
910 use_view = true;
911
912 if (use_view)
913 max_cpus = max_cpu_count(cg);
914
915 f = fopen_cached("/proc/cpuinfo", "re", &fopen_cache);
916 if (!f)
917 return 0;
918
919 while (getline(&line, &linelen, f) != -1) {
920 ssize_t l;
921 if (firstline) {
922 firstline = false;
923 if (strstr(line, "IBM/S390") != NULL) {
924 is_s390x = true;
925 am_printing = true;
926 continue;
927 }
928 }
929 if (strncmp(line, "# processors:", 12) == 0)
930 continue;
931 if (is_processor_line(line)) {
932 if (use_view && max_cpus > 0 && (curcpu+1) == max_cpus)
933 break;
934 am_printing = cpuline_in_cpuset(line, cpuset);
935 if (am_printing) {
936 curcpu ++;
937 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
938 if (l < 0) {
939 perror("Error writing to cache");
940 return 0;
941 }
942 if (l >= cache_size) {
943 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
944 return 0;
945 }
946 cache += l;
947 cache_size -= l;
948 total_len += l;
949 }
950 continue;
951 } else if (is_s390x && sscanf(line, "processor %d:", &cpu) == 1) {
952 char *p;
953 if (use_view && max_cpus > 0 && (curcpu+1) == max_cpus)
954 break;
955 if (!cpu_in_cpuset(cpu, cpuset))
956 continue;
957 curcpu ++;
958 p = strchr(line, ':');
959 if (!p || !*p)
960 return 0;
961 p++;
962 l = snprintf(cache, cache_size, "processor %d:%s", curcpu, p);
963 if (l < 0) {
964 perror("Error writing to cache");
965 return 0;
966 }
967 if (l >= cache_size) {
968 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
969 return 0;
970 }
971 cache += l;
972 cache_size -= l;
973 total_len += l;
974 continue;
975
976 }
977 if (am_printing) {
978 l = snprintf(cache, cache_size, "%s", line);
979 if (l < 0) {
980 perror("Error writing to cache");
981 return 0;
982 }
983 if (l >= cache_size) {
984 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
985 return 0;
986 }
987 cache += l;
988 cache_size -= l;
989 total_len += l;
990 }
991 }
992
993 if (is_s390x) {
994 __do_free char *origcache = d->buf;
995 ssize_t l;
996
997 d->buf = malloc(d->buflen);
998 if (!d->buf) {
999 d->buf = move_ptr(origcache);
1000 return 0;
1001 }
1002
1003 cache = d->buf;
1004 cache_size = d->buflen;
1005 total_len = 0;
1006 l = snprintf(cache, cache_size, "vendor_id : IBM/S390\n");
1007 if (l < 0 || l >= cache_size)
1008 return 0;
1009
1010 cache_size -= l;
1011 cache += l;
1012 total_len += l;
1013 l = snprintf(cache, cache_size, "# processors : %d\n", curcpu + 1);
1014 if (l < 0 || l >= cache_size)
1015 return 0;
1016
1017 cache_size -= l;
1018 cache += l;
1019 total_len += l;
1020 l = snprintf(cache, cache_size, "%s", origcache);
1021 if (l < 0 || l >= cache_size)
1022 return 0;
1023 total_len += l;
1024 }
1025
1026 d->cached = 1;
1027 d->size = total_len;
1028 if (total_len > size ) total_len = size;
1029
1030 /* read from off 0 */
1031 memcpy(buf, d->buf, total_len);
1032 return total_len;
1033 }
1034
1035 /*
1036 * Returns 0 on success.
1037 * It is the caller's responsibility to free `return_usage`, unless this
1038 * function returns an error.
1039 */
1040 int read_cpuacct_usage_all(char *cg, char *cpuset,
1041 struct cpuacct_usage **return_usage, int *size)
1042 {
1043 __do_free char *usage_str = NULL;
1044 __do_free struct cpuacct_usage *cpu_usage = NULL;
1045 int cpucount = get_nprocs_conf();
1046 int i = 0, j = 0, read_pos = 0, read_cnt = 0;
1047 int ret;
1048 int cg_cpu;
1049 uint64_t cg_user, cg_system;
1050 int64_t ticks_per_sec;
1051
1052 ticks_per_sec = sysconf(_SC_CLK_TCK);
1053
1054 if (ticks_per_sec < 0 && errno == EINVAL) {
1055 lxcfs_v(
1056 "%s\n",
1057 "read_cpuacct_usage_all failed to determine number of clock ticks "
1058 "in a second");
1059 return -1;
1060 }
1061
1062 cpu_usage = malloc(sizeof(struct cpuacct_usage) * cpucount);
1063 if (!cpu_usage)
1064 return -ENOMEM;
1065
1066 memset(cpu_usage, 0, sizeof(struct cpuacct_usage) * cpucount);
1067 if (!cgroup_ops->get(cgroup_ops, "cpuacct", cg, "cpuacct.usage_all", &usage_str)) {
1068 char *data = NULL;
1069 size_t sz = 0, asz = 0;
1070
1071 /* read cpuacct.usage_percpu instead. */
1072 lxcfs_v("failed to read cpuacct.usage_all. reading cpuacct.usage_percpu instead\n%s", "");
1073 if (!cgroup_ops->get(cgroup_ops, "cpuacct", cg, "cpuacct.usage_percpu", &usage_str))
1074 return -1;
1075 lxcfs_v("usage_str: %s\n", usage_str);
1076
1077 /* convert cpuacct.usage_percpu into cpuacct.usage_all. */
1078 lxcfs_v("converting cpuacct.usage_percpu into cpuacct.usage_all\n%s", "");
1079
1080 must_strcat(&data, &sz, &asz, "cpu user system\n");
1081
1082 while (sscanf(usage_str + read_pos, "%" PRIu64 " %n", &cg_user, &read_cnt) > 0) {
1083 lxcfs_debug("i: %d, cg_user: %" PRIu64 ", read_pos: %d, read_cnt: %d\n", i, cg_user, read_pos, read_cnt);
1084 must_strcat(&data, &sz, &asz, "%d %lu 0\n", i, cg_user);
1085 i++;
1086 read_pos += read_cnt;
1087 }
1088
1089 usage_str = data;
1090
1091 lxcfs_v("usage_str: %s\n", usage_str);
1092 }
1093
1094 if (sscanf(usage_str, "cpu user system\n%n", &read_cnt) != 0) {
1095 lxcfs_error("read_cpuacct_usage_all reading first line from "
1096 "%s/cpuacct.usage_all failed.\n", cg);
1097 return -1;
1098 }
1099
1100 read_pos += read_cnt;
1101
1102 for (i = 0, j = 0; i < cpucount; i++) {
1103 ret = sscanf(usage_str + read_pos,
1104 "%d %" PRIu64 " %" PRIu64 "\n%n", &cg_cpu,
1105 &cg_user, &cg_system, &read_cnt);
1106
1107 if (ret == EOF)
1108 break;
1109
1110 if (ret != 3) {
1111 lxcfs_error("read_cpuacct_usage_all reading from %s/cpuacct.usage_all "
1112 "failed.\n", cg);
1113 return -1;
1114 }
1115
1116 read_pos += read_cnt;
1117
1118 /* Convert the time from nanoseconds to USER_HZ */
1119 cpu_usage[j].user = cg_user / 1000.0 / 1000 / 1000 * ticks_per_sec;
1120 cpu_usage[j].system = cg_system / 1000.0 / 1000 / 1000 * ticks_per_sec;
1121 j++;
1122 }
1123
1124 *return_usage = move_ptr(cpu_usage);
1125 *size = cpucount;
1126 return 0;
1127 }
1128
1129 static bool cpuview_init_head(struct cg_proc_stat_head **head)
1130 {
1131 *head = malloc(sizeof(struct cg_proc_stat_head));
1132 if (!(*head)) {
1133 lxcfs_error("%s\n", strerror(errno));
1134 return false;
1135 }
1136
1137 (*head)->lastcheck = time(NULL);
1138 (*head)->next = NULL;
1139
1140 if (pthread_rwlock_init(&(*head)->lock, NULL) != 0) {
1141 lxcfs_error("%s\n", "Failed to initialize list lock");
1142 free_disarm(*head);
1143 return false;
1144 }
1145
1146 return true;
1147 }
1148
1149 bool init_cpuview(void)
1150 {
1151 int i;
1152
1153 for (i = 0; i < CPUVIEW_HASH_SIZE; i++)
1154 proc_stat_history[i] = NULL;
1155
1156 for (i = 0; i < CPUVIEW_HASH_SIZE; i++) {
1157 if (!cpuview_init_head(&proc_stat_history[i]))
1158 goto err;
1159 }
1160
1161 return true;
1162
1163 err:
1164 for (i = 0; i < CPUVIEW_HASH_SIZE; i++) {
1165 if (proc_stat_history[i])
1166 free_disarm(proc_stat_history[i]);
1167 }
1168
1169 return false;
1170 }
1171
1172 static void cpuview_free_head(struct cg_proc_stat_head *head)
1173 {
1174 struct cg_proc_stat *node, *tmp;
1175
1176 if (head->next) {
1177 node = head->next;
1178
1179 for (;;) {
1180 tmp = node;
1181 node = node->next;
1182 free_proc_stat_node(tmp);
1183
1184 if (!node)
1185 break;
1186 }
1187 }
1188
1189 pthread_rwlock_destroy(&head->lock);
1190 free_disarm(head);
1191 }
1192
1193 void free_cpuview(void)
1194 {
1195 for (int i = 0; i < CPUVIEW_HASH_SIZE; i++)
1196 if (proc_stat_history[i])
1197 cpuview_free_head(proc_stat_history[i]);
1198 }