From: Jakub Skokan Date: Mon, 18 Jun 2018 08:50:02 +0000 (+0200) Subject: Per-container CPU usage in /proc/stat X-Git-Tag: lxcfs-3.1.0~21^2 X-Git-Url: https://git.proxmox.com/?p=mirror_lxcfs.git;a=commitdiff_plain;h=8be92dd19ceecb0ab54f36cc69d5f3845ba29844;hp=1c9fc36f1a81f2142d5f641e83bb8146752936b5 Per-container CPU usage in /proc/stat Containers can see utilization of all available CPUs, even if the CPU is utilized by other containers or by the host. The contents of `/proc/stat` is shared across the system, except for hiding CPUs excluded by cpuset. This commit attempts to fix that, but at a cost. CPU usage is read from cpuacct cgroup, but that accounts only for `user` and `system` fields from `/proc/stat`. Idle time can be calculated, but other fields cannot, thus are always set to 0. Additionally, idle time is based on values from the host, so even a freshly started container has large initial idle time. If the cpuacct cgroup is not present, or reading from it fails, LXCFS will return CPU usage stats from the host as before. Signed-off-by: Jakub Skokan --- diff --git a/bindings.c b/bindings.c index 6ab1bdb..4b6f51b 100644 --- a/bindings.c +++ b/bindings.c @@ -80,6 +80,11 @@ struct file_info { int cached; }; +struct cpuacct_usage { + uint64_t user; + uint64_t system; +}; + /* The function of hash table.*/ #define LOAD_SIZE 100 /*the size of hash_table */ #define FLUSH_TIME 5 /*the flush rate */ @@ -3794,6 +3799,89 @@ static uint64_t get_reaper_age(pid_t pid) return procage; } +/* + * Returns 0 on success. + * It is the caller's responsibility to free `return_usage`, unless this + * function returns an error. + */ +static int read_cpuacct_usage_all(char *cg, char *cpuset, struct cpuacct_usage **return_usage) +{ + int cpucount = get_nprocs(); + struct cpuacct_usage *cpu_usage; + int rv = 0, i, j, ret, read_pos = 0, read_cnt; + int cg_cpu; + uint64_t cg_user, cg_system; + int64_t ticks_per_sec; + char *usage_str = NULL; + + ticks_per_sec = sysconf(_SC_CLK_TCK); + + if (ticks_per_sec < 0 && errno == EINVAL) { + lxcfs_debug( + "%s\n", + "read_cpuacct_usage_all failed to determine number of clock ticks " + "in a second"); + return -1; + } + + cpu_usage = malloc(sizeof(struct cpuacct_usage) * cpucount); + if (!cpu_usage) + return -ENOMEM; + + if (!cgfs_get_value("cpuacct", cg, "cpuacct.usage_all", &usage_str)) { + rv = -1; + goto err; + } + + if (sscanf(usage_str, "cpu user system\n%n", &read_cnt) != 0) { + lxcfs_error("read_cpuacct_usage_all reading first line from " + "%s/cpuacct.usage_all failed.\n", cg); + rv = -1; + goto err; + } + + read_pos += read_cnt; + + for (i = 0, j = 0; i < cpucount; i++) { + ret = sscanf(usage_str + read_pos, "%d %lu %lu\n%n", &cg_cpu, &cg_user, + &cg_system, &read_cnt); + + if (ret == EOF) + break; + + if (ret != 3) { + lxcfs_error("read_cpuacct_usage_all reading from %s/cpuacct.usage_all " + "failed.\n", cg); + rv = -1; + goto err; + } + + read_pos += read_cnt; + + if (!cpu_in_cpuset(i, cpuset)) + continue; + + /* Convert the time from nanoseconds to USER_HZ */ + cpu_usage[j].user = cg_user / 1000.0 / 1000 / 1000 * ticks_per_sec; + cpu_usage[j].system = cg_system / 1000.0 / 1000 / 1000 * ticks_per_sec; + j++; + } + + rv = 0; + *return_usage = cpu_usage; + +err: + if (usage_str) + free(usage_str); + + if (rv != 0) { + free(cpu_usage); + *return_usage = NULL; + } + + return rv; +} + #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2) static int proc_stat_read(char *buf, size_t size, off_t offset, struct fuse_file_info *fi) @@ -3813,6 +3901,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, char *cache = d->buf + CPUALL_MAX_SIZE; size_t cache_size = d->buflen - CPUALL_MAX_SIZE; FILE *f = NULL; + struct cpuacct_usage *cg_cpu_usage = NULL; if (offset){ if (offset > d->size) @@ -3837,6 +3926,16 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, if (!cpuset) goto err; + /* + * Read cpuacct.usage_all for all CPUs. + * If the cpuacct cgroup is present, it is used to calculate the container's + * CPU usage. If not, values from the host's /proc/stat are used. + */ + if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage) != 0) { + lxcfs_debug("%s\n", "proc_stat_read failed to read from cpuacct, " + "falling back to the host's /proc/stat"); + } + f = fopen("/proc/stat", "r"); if (!f) goto err; @@ -3852,6 +3951,8 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, int cpu; char cpu_char[10]; /* That's a lot of cores */ char *c; + uint64_t all_used, cg_used, new_idle; + int ret; if (strlen(line) == 0) continue; @@ -3880,27 +3981,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, continue; curcpu ++; - c = strchr(line, ' '); - if (!c) - continue; - l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c); - if (l < 0) { - perror("Error writing to cache"); - rv = 0; - goto err; - - } - if (l >= cache_size) { - lxcfs_error("%s\n", "Internal error: truncated write to cache."); - rv = 0; - goto err; - } - - cache += l; - cache_size -= l; - total_len += l; - - if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", + ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, @@ -3910,18 +3991,83 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, &softirq, &steal, &guest, - &guest_nice) != 10) - continue; - user_sum += user; - nice_sum += nice; - system_sum += system; - idle_sum += idle; - iowait_sum += iowait; - irq_sum += irq; - softirq_sum += softirq; - steal_sum += steal; - guest_sum += guest; - guest_nice_sum += guest_nice; + &guest_nice); + + if (ret != 10 || !cg_cpu_usage) { + c = strchr(line, ' '); + if (!c) + continue; + l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c); + if (l < 0) { + perror("Error writing to cache"); + rv = 0; + goto err; + + } + if (l >= cache_size) { + lxcfs_error("%s\n", "Internal error: truncated write to cache."); + rv = 0; + goto err; + } + + cache += l; + cache_size -= l; + total_len += l; + + if (ret != 10) + continue; + } + + if (cg_cpu_usage) { + all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice; + cg_used = cg_cpu_usage[curcpu].user + cg_cpu_usage[curcpu].system; + + if (all_used >= cg_used) { + new_idle = idle + (all_used - cg_used); + + } else { + lxcfs_error("cpu%d from %s has unexpected cpu time: %lu in /proc/stat, " + "%lu in cpuacct.usage_all; unable to determine idle time\n", + curcpu, cg, all_used, cg_used); + new_idle = idle; + } + + l = snprintf(cache, cache_size, "cpu%d %lu 0 %lu %lu 0 0 0 0 0 0\n", + curcpu, cg_cpu_usage[curcpu].user, cg_cpu_usage[curcpu].system, + new_idle); + + if (l < 0) { + perror("Error writing to cache"); + rv = 0; + goto err; + + } + if (l >= cache_size) { + lxcfs_error("%s\n", "Internal error: truncated write to cache."); + rv = 0; + goto err; + } + + cache += l; + cache_size -= l; + total_len += l; + + user_sum += cg_cpu_usage[curcpu].user; + system_sum += cg_cpu_usage[curcpu].system; + idle_sum += new_idle; + + } else { + user_sum += user; + nice_sum += nice; + system_sum += system; + idle_sum += idle; + iowait_sum += iowait; + irq_sum += irq; + softirq_sum += softirq; + steal_sum += steal; + guest_sum += guest; + guest_nice_sum += guest_nice; + } } cache = d->buf; @@ -3959,6 +4105,8 @@ static int proc_stat_read(char *buf, size_t size, off_t offset, err: if (f) fclose(f); + if (cg_cpu_usage) + free(cg_cpu_usage); free(line); free(cpuset); free(cg);