]>
git.proxmox.com Git - mirror_lxcfs.git/blob - proc_cpuview.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
7 #ifndef FUSE_USE_VERSION
8 #define FUSE_USE_VERSION 26
11 #define _FILE_OFFSET_BITS 64
13 #define __STDC_FORMAT_MACROS
31 #include <linux/magic.h>
32 #include <linux/sched.h>
33 #include <sys/epoll.h>
35 #include <sys/mount.h>
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/syscall.h>
39 #include <sys/sysinfo.h>
44 #include "cgroup_fuse.h"
45 #include "cpuset_parse.h"
46 #include "cgroups/cgroup.h"
47 #include "cgroups/cgroup_utils.h"
48 #include "memory_utils.h"
49 #include "proc_loadavg.h"
52 /* Data for CPU view */
55 struct cpuacct_usage
*usage
; // Real usage as read from the host's /proc/stat
56 struct cpuacct_usage
*view
; // Usage stats reported to the container
58 pthread_mutex_t lock
; // For node manipulation
59 struct cg_proc_stat
*next
;
62 struct cg_proc_stat_head
{
63 struct cg_proc_stat
*next
;
67 * For access to the list. Reading can be parallel, pruning is exclusive.
69 pthread_rwlock_t lock
;
72 #define CPUVIEW_HASH_SIZE 100
73 static struct cg_proc_stat_head
*proc_stat_history
[CPUVIEW_HASH_SIZE
];
75 static void reset_proc_stat_node(struct cg_proc_stat
*node
, struct cpuacct_usage
*usage
, int cpu_count
)
79 lxcfs_debug("Resetting stat node for %s\n", node
->cg
);
80 memcpy(node
->usage
, usage
, sizeof(struct cpuacct_usage
) * cpu_count
);
82 for (i
= 0; i
< cpu_count
; i
++) {
83 node
->view
[i
].user
= 0;
84 node
->view
[i
].system
= 0;
85 node
->view
[i
].idle
= 0;
88 node
->cpu_count
= cpu_count
;
91 static bool expand_proc_stat_node(struct cg_proc_stat
*node
, int cpu_count
)
93 __do_free
struct cpuacct_usage
*new_usage
= NULL
, *new_view
= NULL
;
95 /* Allocate new memory */
96 new_usage
= malloc(sizeof(struct cpuacct_usage
) * cpu_count
);
100 new_view
= malloc(sizeof(struct cpuacct_usage
) * cpu_count
);
104 /* Copy existing data & initialize new elements */
105 for (int i
= 0; i
< cpu_count
; i
++) {
106 if (i
< node
->cpu_count
) {
107 new_usage
[i
].user
= node
->usage
[i
].user
;
108 new_usage
[i
].system
= node
->usage
[i
].system
;
109 new_usage
[i
].idle
= node
->usage
[i
].idle
;
111 new_view
[i
].user
= node
->view
[i
].user
;
112 new_view
[i
].system
= node
->view
[i
].system
;
113 new_view
[i
].idle
= node
->view
[i
].idle
;
115 new_usage
[i
].user
= 0;
116 new_usage
[i
].system
= 0;
117 new_usage
[i
].idle
= 0;
119 new_view
[i
].user
= 0;
120 new_view
[i
].system
= 0;
121 new_view
[i
].idle
= 0;
126 node
->usage
= move_ptr(new_usage
);
129 node
->view
= move_ptr(new_view
);
130 node
->cpu_count
= cpu_count
;
135 static void free_proc_stat_node(struct cg_proc_stat
*node
)
137 pthread_mutex_destroy(&node
->lock
);
138 free_disarm(node
->cg
);
139 free_disarm(node
->usage
);
140 free_disarm(node
->view
);
144 static struct cg_proc_stat
*add_proc_stat_node(struct cg_proc_stat
*new_node
)
146 int hash
= calc_hash(new_node
->cg
) % CPUVIEW_HASH_SIZE
;
147 struct cg_proc_stat_head
*head
= proc_stat_history
[hash
];
148 struct cg_proc_stat
*node
, *rv
= new_node
;
150 pthread_rwlock_wrlock(&head
->lock
);
153 head
->next
= new_node
;
160 if (strcmp(node
->cg
, new_node
->cg
) == 0) {
161 /* The node is already present, return it */
162 free_proc_stat_node(new_node
);
172 node
->next
= new_node
;
177 pthread_rwlock_unlock(&head
->lock
);
181 static struct cg_proc_stat
*new_proc_stat_node(struct cpuacct_usage
*usage
, int cpu_count
, const char *cg
)
183 struct cg_proc_stat
*node
;
186 node
= malloc(sizeof(struct cg_proc_stat
));
194 node
->cg
= malloc(strlen(cg
) + 1);
198 strcpy(node
->cg
, cg
);
200 node
->usage
= malloc(sizeof(struct cpuacct_usage
) * cpu_count
);
204 memcpy(node
->usage
, usage
, sizeof(struct cpuacct_usage
) * cpu_count
);
206 node
->view
= malloc(sizeof(struct cpuacct_usage
) * cpu_count
);
210 node
->cpu_count
= cpu_count
;
213 if (pthread_mutex_init(&node
->lock
, NULL
) != 0) {
214 lxcfs_error("%s\n", "Failed to initialize node lock");
218 for (i
= 0; i
< cpu_count
; i
++) {
219 node
->view
[i
].user
= 0;
220 node
->view
[i
].system
= 0;
221 node
->view
[i
].idle
= 0;
227 if (node
&& node
->cg
)
229 if (node
&& node
->usage
)
231 if (node
&& node
->view
)
239 static bool cgfs_param_exist(const char *controller
, const char *cgroup
,
242 __do_free
char *path
= NULL
;
245 cfd
= get_cgroup_fd(controller
);
249 path
= must_make_path(dot_or_empty(cgroup
), cgroup
, file
);
250 return (faccessat(cfd
, path
, F_OK
, 0) == 0);
253 static struct cg_proc_stat
*prune_proc_stat_list(struct cg_proc_stat
*node
)
255 struct cg_proc_stat
*first
= NULL
, *prev
, *tmp
;
257 for (prev
= NULL
; node
; ) {
258 if (!cgfs_param_exist("cpu", node
->cg
, "cpu.shares")) {
260 lxcfs_debug("Removing stat node for %s\n", node
->cg
);
263 prev
->next
= node
->next
;
268 free_proc_stat_node(tmp
);
280 #define PROC_STAT_PRUNE_INTERVAL 10
281 static void prune_proc_stat_history(void)
284 time_t now
= time(NULL
);
286 for (i
= 0; i
< CPUVIEW_HASH_SIZE
; i
++) {
287 pthread_rwlock_wrlock(&proc_stat_history
[i
]->lock
);
289 if ((proc_stat_history
[i
]->lastcheck
+ PROC_STAT_PRUNE_INTERVAL
) > now
) {
290 pthread_rwlock_unlock(&proc_stat_history
[i
]->lock
);
294 if (proc_stat_history
[i
]->next
) {
295 proc_stat_history
[i
]->next
= prune_proc_stat_list(proc_stat_history
[i
]->next
);
296 proc_stat_history
[i
]->lastcheck
= now
;
299 pthread_rwlock_unlock(&proc_stat_history
[i
]->lock
);
303 static struct cg_proc_stat
*find_proc_stat_node(struct cg_proc_stat_head
*head
,
306 struct cg_proc_stat
*node
;
308 pthread_rwlock_rdlock(&head
->lock
);
311 pthread_rwlock_unlock(&head
->lock
);
318 if (strcmp(cg
, node
->cg
) == 0)
320 } while ((node
= node
->next
));
325 pthread_rwlock_unlock(&head
->lock
);
326 prune_proc_stat_history();
330 static struct cg_proc_stat
*find_or_create_proc_stat_node(struct cpuacct_usage
*usage
, int cpu_count
, const char *cg
)
332 int hash
= calc_hash(cg
) % CPUVIEW_HASH_SIZE
;
333 struct cg_proc_stat_head
*head
= proc_stat_history
[hash
];
334 struct cg_proc_stat
*node
;
336 node
= find_proc_stat_node(head
, cg
);
339 node
= new_proc_stat_node(usage
, cpu_count
, cg
);
343 node
= add_proc_stat_node(node
);
344 lxcfs_debug("New stat node (%d) for %s\n", cpu_count
, cg
);
347 pthread_mutex_lock(&node
->lock
);
349 /* If additional CPUs on the host have been enabled, CPU usage counter
350 * arrays have to be expanded */
351 if (node
->cpu_count
< cpu_count
) {
352 lxcfs_debug("Expanding stat node %d->%d for %s\n",
353 node
->cpu_count
, cpu_count
, cg
);
355 if (!expand_proc_stat_node(node
, cpu_count
)) {
356 pthread_mutex_unlock(&node
->lock
);
357 lxcfs_debug("Unable to expand stat node %d->%d for %s\n",
358 node
->cpu_count
, cpu_count
, cg
);
366 static void add_cpu_usage(uint64_t *surplus
, struct cpuacct_usage
*usage
,
367 uint64_t *counter
, uint64_t threshold
)
369 unsigned long free_space
, to_add
;
371 free_space
= threshold
- usage
->user
- usage
->system
;
373 if (free_space
> usage
->idle
)
374 free_space
= usage
->idle
;
376 to_add
= free_space
> *surplus
? *surplus
: free_space
;
379 usage
->idle
-= to_add
;
383 static unsigned long diff_cpu_usage(struct cpuacct_usage
*older
,
384 struct cpuacct_usage
*newer
,
385 struct cpuacct_usage
*diff
, int cpu_count
)
388 unsigned long sum
= 0;
390 for (i
= 0; i
< cpu_count
; i
++) {
391 if (!newer
[i
].online
)
394 /* When cpuset is changed on the fly, the CPUs might get reordered.
395 * We could either reset all counters, or check that the substractions
396 * below will return expected results.
398 if (newer
[i
].user
> older
[i
].user
)
399 diff
[i
].user
= newer
[i
].user
- older
[i
].user
;
403 if (newer
[i
].system
> older
[i
].system
)
404 diff
[i
].system
= newer
[i
].system
- older
[i
].system
;
408 if (newer
[i
].idle
> older
[i
].idle
)
409 diff
[i
].idle
= newer
[i
].idle
- older
[i
].idle
;
414 sum
+= diff
[i
].system
;
422 * Read cgroup CPU quota parameters from `cpu.cfs_quota_us` or `cpu.cfs_period_us`,
423 * depending on `param`. Parameter value is returned throuh `value`.
425 static bool read_cpu_cfs_param(const char *cg
, const char *param
, int64_t *value
)
427 __do_free
char *str
= NULL
;
428 char file
[11 + 6 + 1]; /* cpu.cfs__us + quota/period + \0 */
430 snprintf(file
, sizeof(file
), "cpu.cfs_%s_us", param
);
432 if (!cgroup_ops
->get(cgroup_ops
, "cpu", cg
, file
, &str
))
435 if (sscanf(str
, "%"PRId64
, value
) != 1)
442 * Return the exact number of visible CPUs based on CPU quotas.
443 * If there is no quota set, zero is returned.
445 static double exact_cpu_count(const char *cg
)
449 int64_t cfs_quota
, cfs_period
;
451 if (!read_cpu_cfs_param(cg
, "quota", &cfs_quota
))
454 if (!read_cpu_cfs_param(cg
, "period", &cfs_period
))
457 if (cfs_quota
<= 0 || cfs_period
<= 0)
460 rv
= (double)cfs_quota
/ (double)cfs_period
;
462 nprocs
= get_nprocs();
471 * Return the maximum number of visible CPUs based on CPU quotas.
472 * If there is no quota set, zero is returned.
474 int max_cpu_count(const char *cg
)
476 __do_free
char *cpuset
= NULL
;
478 int64_t cfs_quota
, cfs_period
;
479 int nr_cpus_in_cpuset
= 0;
481 if (!read_cpu_cfs_param(cg
, "quota", &cfs_quota
))
484 if (!read_cpu_cfs_param(cg
, "period", &cfs_period
))
487 cpuset
= get_cpuset(cg
);
489 nr_cpus_in_cpuset
= cpu_number_in_cpuset(cpuset
);
491 if (cfs_quota
<= 0 || cfs_period
<= 0){
492 if (nr_cpus_in_cpuset
> 0)
493 return nr_cpus_in_cpuset
;
498 rv
= cfs_quota
/ cfs_period
;
500 /* In case quota/period does not yield a whole number, add one CPU for
503 if ((cfs_quota
% cfs_period
) > 0)
506 nprocs
= get_nprocs();
511 /* use min value in cpu quota and cpuset */
512 if (nr_cpus_in_cpuset
> 0 && nr_cpus_in_cpuset
< rv
)
513 rv
= nr_cpus_in_cpuset
;
518 int cpuview_proc_stat(const char *cg
, const char *cpuset
,
519 struct cpuacct_usage
*cg_cpu_usage
, int cg_cpu_usage_size
,
520 FILE *f
, char *buf
, size_t buf_size
)
522 __do_free
char *line
= NULL
;
523 __do_free
struct cpuacct_usage
*diff
= NULL
;
524 size_t linelen
= 0, total_len
= 0, l
;
525 int curcpu
= -1; /* cpu numbering starts at 0 */
527 int max_cpus
= max_cpu_count(cg
), cpu_cnt
= 0;
528 uint64_t user
= 0, nice
= 0, system
= 0, idle
= 0, iowait
= 0, irq
= 0,
529 softirq
= 0, steal
= 0, guest
= 0, guest_nice
= 0;
530 uint64_t user_sum
= 0, system_sum
= 0, idle_sum
= 0;
531 uint64_t user_surplus
= 0, system_surplus
= 0;
532 uint64_t total_sum
, threshold
;
533 struct cg_proc_stat
*stat_node
;
534 int nprocs
= get_nprocs_conf();
536 if (cg_cpu_usage_size
< nprocs
)
537 nprocs
= cg_cpu_usage_size
;
539 /* Read all CPU stats and stop when we've encountered other lines */
540 while (getline(&line
, &linelen
, f
) != -1) {
542 char cpu_char
[10]; /* That's a lot of cores */
543 uint64_t all_used
, cg_used
;
545 if (strlen(line
) == 0)
548 /* not a ^cpuN line containing a number N */
549 if (sscanf(line
, "cpu%9[^ ]", cpu_char
) != 1)
552 if (sscanf(cpu_char
, "%d", &physcpu
) != 1)
555 if (physcpu
>= cg_cpu_usage_size
)
561 if (!cpu_in_cpuset(physcpu
, cpuset
)) {
562 for (i
= curcpu
; i
<= physcpu
; i
++)
563 cg_cpu_usage
[i
].online
= false;
567 if (curcpu
< physcpu
) {
568 /* Some CPUs may be disabled */
569 for (i
= curcpu
; i
< physcpu
; i
++)
570 cg_cpu_usage
[i
].online
= false;
575 cg_cpu_usage
[curcpu
].online
= true;
577 ret
= sscanf(line
, "%*s %" PRIu64
" %" PRIu64
" %" PRIu64
" %" PRIu64
" %" PRIu64
" %" PRIu64
" %" PRIu64
" %" PRIu64
" %" PRIu64
" %" PRIu64
"lu",
592 all_used
= user
+ nice
+ system
+ iowait
+ irq
+ softirq
+ steal
+ guest
+ guest_nice
;
593 cg_used
= cg_cpu_usage
[curcpu
].user
+ cg_cpu_usage
[curcpu
].system
;
595 if (all_used
>= cg_used
) {
596 cg_cpu_usage
[curcpu
].idle
= idle
+ (all_used
- cg_used
);
599 lxcfs_error("cpu%d from %s has unexpected cpu time: %" PRIu64
" in /proc/stat, %" PRIu64
" in cpuacct.usage_all; unable to determine idle time",
600 curcpu
, cg
, all_used
, cg_used
);
601 cg_cpu_usage
[curcpu
].idle
= idle
;
605 /* Cannot use more CPUs than is available due to cpuset */
606 if (max_cpus
> cpu_cnt
)
609 stat_node
= find_or_create_proc_stat_node(cg_cpu_usage
, nprocs
, cg
);
611 lxcfs_error("unable to find/create stat node for %s\n", cg
);
615 diff
= malloc(sizeof(struct cpuacct_usage
) * nprocs
);
620 * If the new values are LOWER than values stored in memory, it means
621 * the cgroup has been reset/recreated and we should reset too.
623 for (curcpu
= 0; curcpu
< nprocs
; curcpu
++) {
624 if (!cg_cpu_usage
[curcpu
].online
)
627 if (cg_cpu_usage
[curcpu
].user
< stat_node
->usage
[curcpu
].user
)
628 reset_proc_stat_node(stat_node
, cg_cpu_usage
, nprocs
);
633 total_sum
= diff_cpu_usage(stat_node
->usage
, cg_cpu_usage
, diff
, nprocs
);
635 for (curcpu
= 0, i
= -1; curcpu
< nprocs
; curcpu
++) {
636 stat_node
->usage
[curcpu
].online
= cg_cpu_usage
[curcpu
].online
;
638 if (!stat_node
->usage
[curcpu
].online
)
643 stat_node
->usage
[curcpu
].user
+= diff
[curcpu
].user
;
644 stat_node
->usage
[curcpu
].system
+= diff
[curcpu
].system
;
645 stat_node
->usage
[curcpu
].idle
+= diff
[curcpu
].idle
;
647 if (max_cpus
> 0 && i
>= max_cpus
) {
648 user_surplus
+= diff
[curcpu
].user
;
649 system_surplus
+= diff
[curcpu
].system
;
653 /* Calculate usage counters of visible CPUs */
655 uint64_t diff_user
= 0;
656 uint64_t diff_system
= 0;
657 uint64_t diff_idle
= 0;
658 uint64_t max_diff_idle
= 0;
659 uint64_t max_diff_idle_index
= 0;
662 /* threshold = maximum usage per cpu, including idle */
663 threshold
= total_sum
/ cpu_cnt
* max_cpus
;
665 for (curcpu
= 0, i
= -1; curcpu
< nprocs
; curcpu
++) {
666 if (!stat_node
->usage
[curcpu
].online
)
674 if (diff
[curcpu
].user
+ diff
[curcpu
].system
>= threshold
)
678 add_cpu_usage(&user_surplus
, &diff
[curcpu
],
679 &diff
[curcpu
].user
, threshold
);
681 if (diff
[curcpu
].user
+ diff
[curcpu
].system
>= threshold
)
684 /* If there is still room, add system */
685 add_cpu_usage(&system_surplus
, &diff
[curcpu
],
686 &diff
[curcpu
].system
, threshold
);
689 if (user_surplus
> 0)
690 lxcfs_debug("leftover user: %lu for %s\n", user_surplus
, cg
);
691 if (system_surplus
> 0)
692 lxcfs_debug("leftover system: %lu for %s\n", system_surplus
, cg
);
694 for (curcpu
= 0, i
= -1; curcpu
< nprocs
; curcpu
++) {
695 if (!stat_node
->usage
[curcpu
].online
)
703 stat_node
->view
[curcpu
].user
+= diff
[curcpu
].user
;
704 stat_node
->view
[curcpu
].system
+= diff
[curcpu
].system
;
705 stat_node
->view
[curcpu
].idle
+= diff
[curcpu
].idle
;
707 user_sum
+= stat_node
->view
[curcpu
].user
;
708 system_sum
+= stat_node
->view
[curcpu
].system
;
709 idle_sum
+= stat_node
->view
[curcpu
].idle
;
711 diff_user
+= diff
[curcpu
].user
;
712 diff_system
+= diff
[curcpu
].system
;
713 diff_idle
+= diff
[curcpu
].idle
;
714 if (diff
[curcpu
].idle
> max_diff_idle
) {
715 max_diff_idle
= diff
[curcpu
].idle
;
716 max_diff_idle_index
= curcpu
;
719 lxcfs_v("curcpu: %d, diff_user: %lu, diff_system: %lu, diff_idle: %lu\n", curcpu
, diff
[curcpu
].user
, diff
[curcpu
].system
, diff
[curcpu
].idle
);
721 lxcfs_v("total. diff_user: %lu, diff_system: %lu, diff_idle: %lu\n", diff_user
, diff_system
, diff_idle
);
723 /* revise cpu usage view to support partial cpu case. */
724 exact_cpus
= exact_cpu_count(cg
);
725 if (exact_cpus
< (double)max_cpus
){
726 unsigned long delta
= (unsigned long)((double)(diff_user
+ diff_system
+ diff_idle
) * (1 - exact_cpus
/ (double)max_cpus
));
728 lxcfs_v("revising cpu usage view to match the exact cpu count [%f]\n", exact_cpus
);
729 lxcfs_v("delta: %lu\n", delta
);
730 lxcfs_v("idle_sum before: %lu\n", idle_sum
);
731 idle_sum
= idle_sum
> delta
? idle_sum
- delta
: 0;
732 lxcfs_v("idle_sum after: %lu\n", idle_sum
);
734 curcpu
= max_diff_idle_index
;
735 lxcfs_v("curcpu: %d, idle before: %lu\n", curcpu
, stat_node
->view
[curcpu
].idle
);
736 stat_node
->view
[curcpu
].idle
= stat_node
->view
[curcpu
].idle
> delta
? stat_node
->view
[curcpu
].idle
- delta
: 0;
737 lxcfs_v("curcpu: %d, idle after: %lu\n", curcpu
, stat_node
->view
[curcpu
].idle
);
740 for (curcpu
= 0; curcpu
< nprocs
; curcpu
++) {
741 if (!stat_node
->usage
[curcpu
].online
)
744 stat_node
->view
[curcpu
].user
= stat_node
->usage
[curcpu
].user
;
745 stat_node
->view
[curcpu
].system
= stat_node
->usage
[curcpu
].system
;
746 stat_node
->view
[curcpu
].idle
= stat_node
->usage
[curcpu
].idle
;
748 user_sum
+= stat_node
->view
[curcpu
].user
;
749 system_sum
+= stat_node
->view
[curcpu
].system
;
750 idle_sum
+= stat_node
->view
[curcpu
].idle
;
754 /* Render the file */
756 l
= snprintf(buf
, buf_size
,
757 "cpu %" PRIu64
" 0 %" PRIu64
" %" PRIu64
" 0 0 0 0 0 0\n",
758 user_sum
, system_sum
, idle_sum
);
759 lxcfs_v("cpu-all: %s\n", buf
);
762 perror("Error writing to cache");
766 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
774 /* Render visible CPUs */
775 for (curcpu
= 0, i
= -1; curcpu
< nprocs
; curcpu
++) {
776 if (!stat_node
->usage
[curcpu
].online
)
781 if (max_cpus
> 0 && i
== max_cpus
)
784 l
= snprintf(buf
, buf_size
, "cpu%d %" PRIu64
" 0 %" PRIu64
" %" PRIu64
" 0 0 0 0 0 0\n",
786 stat_node
->view
[curcpu
].user
,
787 stat_node
->view
[curcpu
].system
,
788 stat_node
->view
[curcpu
].idle
);
789 lxcfs_v("cpu: %s\n", buf
);
792 perror("Error writing to cache");
797 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
806 /* Pass the rest of /proc/stat, start with the last line read */
807 l
= snprintf(buf
, buf_size
, "%s", line
);
810 perror("Error writing to cache");
815 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
823 /* Pass the rest of the host's /proc/stat */
824 while (getline(&line
, &linelen
, f
) != -1) {
825 l
= snprintf(buf
, buf_size
, "%s", line
);
827 perror("Error writing to cache");
831 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
840 pthread_mutex_unlock(&stat_node
->lock
);
845 * check whether this is a '^processor" line in /proc/cpuinfo
847 static bool is_processor_line(const char *line
)
851 if (sscanf(line
, "processor : %d", &cpu
) == 1)
856 static bool cpuline_in_cpuset(const char *line
, const char *cpuset
)
860 if (sscanf(line
, "processor : %d", &cpu
) != 1)
862 return cpu_in_cpuset(cpu
, cpuset
);
865 int proc_cpuinfo_read(char *buf
, size_t size
, off_t offset
,
866 struct fuse_file_info
*fi
)
868 __do_free
char *cg
= NULL
, *cpuset
= NULL
, *line
= NULL
;
869 __do_free
void *fopen_cache
= NULL
;
870 __do_fclose
FILE *f
= NULL
;
871 struct fuse_context
*fc
= fuse_get_context();
872 struct lxcfs_opts
*opts
= (struct lxcfs_opts
*)fc
->private_data
;
873 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
874 size_t linelen
= 0, total_len
= 0;
875 bool am_printing
= false, firstline
= true, is_s390x
= false;
876 int curcpu
= -1, cpu
, max_cpus
= 0;
878 char *cache
= d
->buf
;
879 size_t cache_size
= d
->buflen
;
884 if (offset
> d
->size
)
890 left
= d
->size
- offset
;
891 total_len
= left
> size
? size
: left
;
892 memcpy(buf
, cache
+ offset
, total_len
);
897 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
898 if (initpid
<= 1 || is_shared_pidns(initpid
))
900 cg
= get_pid_cgroup(initpid
, "cpuset");
902 return read_file_fuse("proc/cpuinfo", buf
, size
, d
);
903 prune_init_slice(cg
);
905 cpuset
= get_cpuset(cg
);
909 if (cgroup_ops
->can_use_cpuview(cgroup_ops
) && opts
->use_cfs
)
913 max_cpus
= max_cpu_count(cg
);
915 f
= fopen_cached("/proc/cpuinfo", "re", &fopen_cache
);
919 while (getline(&line
, &linelen
, f
) != -1) {
923 if (strstr(line
, "IBM/S390") != NULL
) {
929 if (strncmp(line
, "# processors:", 12) == 0)
931 if (is_processor_line(line
)) {
932 if (use_view
&& max_cpus
> 0 && (curcpu
+1) == max_cpus
)
934 am_printing
= cpuline_in_cpuset(line
, cpuset
);
937 l
= snprintf(cache
, cache_size
, "processor : %d\n", curcpu
);
939 perror("Error writing to cache");
942 if (l
>= cache_size
) {
943 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
951 } else if (is_s390x
&& sscanf(line
, "processor %d:", &cpu
) == 1) {
953 if (use_view
&& max_cpus
> 0 && (curcpu
+1) == max_cpus
)
955 if (!cpu_in_cpuset(cpu
, cpuset
))
958 p
= strchr(line
, ':');
962 l
= snprintf(cache
, cache_size
, "processor %d:%s", curcpu
, p
);
964 perror("Error writing to cache");
967 if (l
>= cache_size
) {
968 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
978 l
= snprintf(cache
, cache_size
, "%s", line
);
980 perror("Error writing to cache");
983 if (l
>= cache_size
) {
984 lxcfs_error("%s\n", "Internal error: truncated write to cache.");
994 __do_free
char *origcache
= d
->buf
;
997 d
->buf
= malloc(d
->buflen
);
999 d
->buf
= move_ptr(origcache
);
1004 cache_size
= d
->buflen
;
1006 l
= snprintf(cache
, cache_size
, "vendor_id : IBM/S390\n");
1007 if (l
< 0 || l
>= cache_size
)
1013 l
= snprintf(cache
, cache_size
, "# processors : %d\n", curcpu
+ 1);
1014 if (l
< 0 || l
>= cache_size
)
1020 l
= snprintf(cache
, cache_size
, "%s", origcache
);
1021 if (l
< 0 || l
>= cache_size
)
1027 d
->size
= total_len
;
1028 if (total_len
> size
) total_len
= size
;
1030 /* read from off 0 */
1031 memcpy(buf
, d
->buf
, total_len
);
1036 * Returns 0 on success.
1037 * It is the caller's responsibility to free `return_usage`, unless this
1038 * function returns an error.
1040 int read_cpuacct_usage_all(char *cg
, char *cpuset
,
1041 struct cpuacct_usage
**return_usage
, int *size
)
1043 __do_free
char *usage_str
= NULL
;
1044 __do_free
struct cpuacct_usage
*cpu_usage
= NULL
;
1045 int cpucount
= get_nprocs_conf();
1046 int i
= 0, j
= 0, read_pos
= 0, read_cnt
= 0;
1049 uint64_t cg_user
, cg_system
;
1050 int64_t ticks_per_sec
;
1052 ticks_per_sec
= sysconf(_SC_CLK_TCK
);
1054 if (ticks_per_sec
< 0 && errno
== EINVAL
) {
1057 "read_cpuacct_usage_all failed to determine number of clock ticks "
1062 cpu_usage
= malloc(sizeof(struct cpuacct_usage
) * cpucount
);
1066 memset(cpu_usage
, 0, sizeof(struct cpuacct_usage
) * cpucount
);
1067 if (!cgroup_ops
->get(cgroup_ops
, "cpuacct", cg
, "cpuacct.usage_all", &usage_str
)) {
1069 size_t sz
= 0, asz
= 0;
1071 /* read cpuacct.usage_percpu instead. */
1072 lxcfs_v("failed to read cpuacct.usage_all. reading cpuacct.usage_percpu instead\n%s", "");
1073 if (!cgroup_ops
->get(cgroup_ops
, "cpuacct", cg
, "cpuacct.usage_percpu", &usage_str
))
1075 lxcfs_v("usage_str: %s\n", usage_str
);
1077 /* convert cpuacct.usage_percpu into cpuacct.usage_all. */
1078 lxcfs_v("converting cpuacct.usage_percpu into cpuacct.usage_all\n%s", "");
1080 must_strcat(&data
, &sz
, &asz
, "cpu user system\n");
1082 while (sscanf(usage_str
+ read_pos
, "%" PRIu64
" %n", &cg_user
, &read_cnt
) > 0) {
1083 lxcfs_debug("i: %d, cg_user: %" PRIu64
", read_pos: %d, read_cnt: %d\n", i
, cg_user
, read_pos
, read_cnt
);
1084 must_strcat(&data
, &sz
, &asz
, "%d %lu 0\n", i
, cg_user
);
1086 read_pos
+= read_cnt
;
1091 lxcfs_v("usage_str: %s\n", usage_str
);
1094 if (sscanf(usage_str
, "cpu user system\n%n", &read_cnt
) != 0) {
1095 lxcfs_error("read_cpuacct_usage_all reading first line from "
1096 "%s/cpuacct.usage_all failed.\n", cg
);
1100 read_pos
+= read_cnt
;
1102 for (i
= 0, j
= 0; i
< cpucount
; i
++) {
1103 ret
= sscanf(usage_str
+ read_pos
,
1104 "%d %" PRIu64
" %" PRIu64
"\n%n", &cg_cpu
,
1105 &cg_user
, &cg_system
, &read_cnt
);
1111 lxcfs_error("read_cpuacct_usage_all reading from %s/cpuacct.usage_all "
1116 read_pos
+= read_cnt
;
1118 /* Convert the time from nanoseconds to USER_HZ */
1119 cpu_usage
[j
].user
= cg_user
/ 1000.0 / 1000 / 1000 * ticks_per_sec
;
1120 cpu_usage
[j
].system
= cg_system
/ 1000.0 / 1000 / 1000 * ticks_per_sec
;
1124 *return_usage
= move_ptr(cpu_usage
);
1129 static bool cpuview_init_head(struct cg_proc_stat_head
**head
)
1131 *head
= malloc(sizeof(struct cg_proc_stat_head
));
1133 lxcfs_error("%s\n", strerror(errno
));
1137 (*head
)->lastcheck
= time(NULL
);
1138 (*head
)->next
= NULL
;
1140 if (pthread_rwlock_init(&(*head
)->lock
, NULL
) != 0) {
1141 lxcfs_error("%s\n", "Failed to initialize list lock");
1149 bool init_cpuview(void)
1153 for (i
= 0; i
< CPUVIEW_HASH_SIZE
; i
++)
1154 proc_stat_history
[i
] = NULL
;
1156 for (i
= 0; i
< CPUVIEW_HASH_SIZE
; i
++) {
1157 if (!cpuview_init_head(&proc_stat_history
[i
]))
1164 for (i
= 0; i
< CPUVIEW_HASH_SIZE
; i
++) {
1165 if (proc_stat_history
[i
])
1166 free_disarm(proc_stat_history
[i
]);
1172 static void cpuview_free_head(struct cg_proc_stat_head
*head
)
1174 struct cg_proc_stat
*node
, *tmp
;
1182 free_proc_stat_node(tmp
);
1189 pthread_rwlock_destroy(&head
->lock
);
1193 void free_cpuview(void)
1195 for (int i
= 0; i
< CPUVIEW_HASH_SIZE
; i
++)
1196 if (proc_stat_history
[i
])
1197 cpuview_free_head(proc_stat_history
[i
]);