]> git.proxmox.com Git - mirror_lxcfs.git/commitdiff
Per-container CPU usage in /proc/stat
authorJakub Skokan <jakub.skokan@havefun.cz>
Mon, 18 Jun 2018 08:50:02 +0000 (10:50 +0200)
committerJakub Skokan <jakub.skokan@havefun.cz>
Tue, 19 Jun 2018 16:42:54 +0000 (18:42 +0200)
Containers can see utilization of all available CPUs, even if the CPU
is utilized by other containers or by the host. The contents
of `/proc/stat` is shared across the system, except for hiding CPUs
excluded by cpuset. This commit attempts to fix that, but at a cost.
CPU usage is read from cpuacct cgroup, but that accounts only for
`user` and `system` fields from `/proc/stat`. Idle time can be
calculated, but other fields cannot, thus are always set to 0.
Additionally, idle time is based on values from the host, so even
a freshly started container has large initial idle time.

If the cpuacct cgroup is not present, or reading from it fails, LXCFS
will return CPU usage stats from the host as before.

Signed-off-by: Jakub Skokan <jakub.skokan@havefun.cz>
bindings.c

index 6ab1bdbdb6580485cbb85e30a1ec059893e2aa90..4b6f51b07377eeb243b8233ffbea498e713153ae 100644 (file)
@@ -80,6 +80,11 @@ struct file_info {
        int cached;
 };
 
+struct cpuacct_usage {
+       uint64_t user;
+       uint64_t system;
+};
+
 /* The function of hash table.*/
 #define LOAD_SIZE 100 /*the size of hash_table */
 #define FLUSH_TIME 5  /*the flush rate */
@@ -3794,6 +3799,89 @@ static uint64_t get_reaper_age(pid_t pid)
        return procage;
 }
 
+/*
+ * Returns 0 on success.
+ * It is the caller's responsibility to free `return_usage`, unless this
+ * function returns an error.
+ */
+static int read_cpuacct_usage_all(char *cg, char *cpuset, struct cpuacct_usage **return_usage)
+{
+       int cpucount = get_nprocs();
+       struct cpuacct_usage *cpu_usage;
+       int rv = 0, i, j, ret, read_pos = 0, read_cnt;
+       int cg_cpu;
+       uint64_t cg_user, cg_system;
+       int64_t ticks_per_sec;
+       char *usage_str = NULL;
+
+       ticks_per_sec = sysconf(_SC_CLK_TCK);
+
+       if (ticks_per_sec < 0 && errno == EINVAL) {
+               lxcfs_debug(
+                       "%s\n",
+                       "read_cpuacct_usage_all failed to determine number of clock ticks "
+                       "in a second");
+               return -1;
+       }
+
+       cpu_usage = malloc(sizeof(struct cpuacct_usage) * cpucount);
+       if (!cpu_usage)
+               return -ENOMEM;
+
+       if (!cgfs_get_value("cpuacct", cg, "cpuacct.usage_all", &usage_str)) {
+               rv = -1;
+               goto err;
+       }
+
+       if (sscanf(usage_str, "cpu user system\n%n", &read_cnt) != 0) {
+               lxcfs_error("read_cpuacct_usage_all reading first line from "
+                               "%s/cpuacct.usage_all failed.\n", cg);
+               rv = -1;
+               goto err;
+       }
+
+       read_pos += read_cnt;
+
+       for (i = 0, j = 0; i < cpucount; i++) {
+               ret = sscanf(usage_str + read_pos, "%d %lu %lu\n%n", &cg_cpu, &cg_user,
+                               &cg_system, &read_cnt);
+
+               if (ret == EOF)
+                       break;
+
+               if (ret != 3) {
+                       lxcfs_error("read_cpuacct_usage_all reading from %s/cpuacct.usage_all "
+                                       "failed.\n", cg);
+                       rv = -1;
+                       goto err;
+               }
+
+               read_pos += read_cnt;
+
+               if (!cpu_in_cpuset(i, cpuset))
+                       continue;
+
+               /* Convert the time from nanoseconds to USER_HZ */
+               cpu_usage[j].user = cg_user / 1000.0 / 1000 / 1000 * ticks_per_sec;
+               cpu_usage[j].system = cg_system / 1000.0 / 1000 / 1000 * ticks_per_sec;
+               j++;
+       }
+
+       rv = 0;
+       *return_usage = cpu_usage;
+
+err:
+       if (usage_str)
+               free(usage_str);
+
+       if (rv != 0) {
+               free(cpu_usage);
+               *return_usage = NULL;
+       }
+
+       return rv;
+}
+
 #define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2)
 static int proc_stat_read(char *buf, size_t size, off_t offset,
                struct fuse_file_info *fi)
@@ -3813,6 +3901,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
        char *cache = d->buf + CPUALL_MAX_SIZE;
        size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
        FILE *f = NULL;
+       struct cpuacct_usage *cg_cpu_usage = NULL;
 
        if (offset){
                if (offset > d->size)
@@ -3837,6 +3926,16 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
        if (!cpuset)
                goto err;
 
+       /*
+        * Read cpuacct.usage_all for all CPUs.
+        * If the cpuacct cgroup is present, it is used to calculate the container's
+        * CPU usage. If not, values from the host's /proc/stat are used.
+        */
+       if (read_cpuacct_usage_all(cg, cpuset, &cg_cpu_usage) != 0) {
+               lxcfs_debug("%s\n", "proc_stat_read failed to read from cpuacct, "
+                               "falling back to the host's /proc/stat");
+       }
+
        f = fopen("/proc/stat", "r");
        if (!f)
                goto err;
@@ -3852,6 +3951,8 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
                int cpu;
                char cpu_char[10]; /* That's a lot of cores */
                char *c;
+               uint64_t all_used, cg_used, new_idle;
+               int ret;
 
                if (strlen(line) == 0)
                        continue;
@@ -3880,27 +3981,7 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
                        continue;
                curcpu ++;
 
-               c = strchr(line, ' ');
-               if (!c)
-                       continue;
-               l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
-               if (l < 0) {
-                       perror("Error writing to cache");
-                       rv = 0;
-                       goto err;
-
-               }
-               if (l >= cache_size) {
-                       lxcfs_error("%s\n", "Internal error: truncated write to cache.");
-                       rv = 0;
-                       goto err;
-               }
-
-               cache += l;
-               cache_size -= l;
-               total_len += l;
-
-               if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
+               ret = sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu",
                           &user,
                           &nice,
                           &system,
@@ -3910,18 +3991,83 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
                           &softirq,
                           &steal,
                           &guest,
-                          &guest_nice) != 10)
-                       continue;
-               user_sum += user;
-               nice_sum += nice;
-               system_sum += system;
-               idle_sum += idle;
-               iowait_sum += iowait;
-               irq_sum += irq;
-               softirq_sum += softirq;
-               steal_sum += steal;
-               guest_sum += guest;
-               guest_nice_sum += guest_nice;
+                          &guest_nice);
+
+               if (ret != 10 || !cg_cpu_usage) {
+                       c = strchr(line, ' ');
+                       if (!c)
+                               continue;
+                       l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
+                       if (l < 0) {
+                               perror("Error writing to cache");
+                               rv = 0;
+                               goto err;
+
+                       }
+                       if (l >= cache_size) {
+                               lxcfs_error("%s\n", "Internal error: truncated write to cache.");
+                               rv = 0;
+                               goto err;
+                       }
+
+                       cache += l;
+                       cache_size -= l;
+                       total_len += l;
+
+                       if (ret != 10)
+                               continue;
+               }
+
+               if (cg_cpu_usage) {
+                       all_used = user + nice + system + iowait + irq + softirq + steal + guest + guest_nice;
+                       cg_used = cg_cpu_usage[curcpu].user + cg_cpu_usage[curcpu].system;
+
+                       if (all_used >= cg_used) {
+                               new_idle = idle + (all_used - cg_used);
+
+                       } else {
+                               lxcfs_error("cpu%d from %s has unexpected cpu time: %lu in /proc/stat, "
+                                               "%lu in cpuacct.usage_all; unable to determine idle time\n",
+                                               curcpu, cg, all_used, cg_used);
+                               new_idle = idle;
+                       }
+
+                       l = snprintf(cache, cache_size, "cpu%d %lu 0 %lu %lu 0 0 0 0 0 0\n",
+                                       curcpu, cg_cpu_usage[curcpu].user, cg_cpu_usage[curcpu].system,
+                                       new_idle);
+
+                       if (l < 0) {
+                               perror("Error writing to cache");
+                               rv = 0;
+                               goto err;
+
+                       }
+                       if (l >= cache_size) {
+                               lxcfs_error("%s\n", "Internal error: truncated write to cache.");
+                               rv = 0;
+                               goto err;
+                       }
+
+                       cache += l;
+                       cache_size -= l;
+                       total_len += l;
+
+                       user_sum += cg_cpu_usage[curcpu].user;
+                       system_sum += cg_cpu_usage[curcpu].system;
+                       idle_sum += new_idle;
+
+               } else {
+                       user_sum += user;
+                       nice_sum += nice;
+                       system_sum += system;
+                       idle_sum += idle;
+                       iowait_sum += iowait;
+                       irq_sum += irq;
+                       softirq_sum += softirq;
+                       steal_sum += steal;
+                       guest_sum += guest;
+                       guest_nice_sum += guest_nice;
+               }
        }
 
        cache = d->buf;
@@ -3959,6 +4105,8 @@ static int proc_stat_read(char *buf, size_t size, off_t offset,
 err:
        if (f)
                fclose(f);
+       if (cg_cpu_usage)
+               free(cg_cpu_usage);
        free(line);
        free(cpuset);
        free(cg);