]> git.proxmox.com Git - mirror_lxcfs.git/blame - src/proc_loadavg.c
Merge pull request #474 from brauner/2021-09-01.meson
[mirror_lxcfs.git] / src / proc_loadavg.c
CommitLineData
db0463bf 1/* SPDX-License-Identifier: LGPL-2.1+ */
1f5596dd
CB
2
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6
f834b6bf
SP
7#include "config.h"
8
1f5596dd
CB
9#define __STDC_FORMAT_MACROS
10#include <dirent.h>
11#include <errno.h>
12#include <fcntl.h>
1f5596dd
CB
13#include <inttypes.h>
14#include <libgen.h>
15#include <pthread.h>
16#include <sched.h>
17#include <stdarg.h>
18#include <stdbool.h>
19#include <stdint.h>
20#include <stdio.h>
21#include <stdlib.h>
22#include <string.h>
23#include <time.h>
24#include <unistd.h>
25#include <wait.h>
26#include <linux/magic.h>
27#include <linux/sched.h>
28#include <sys/epoll.h>
29#include <sys/mman.h>
30#include <sys/mount.h>
31#include <sys/param.h>
32#include <sys/socket.h>
33#include <sys/syscall.h>
34#include <sys/sysinfo.h>
35#include <sys/vfs.h>
36
e01afbb7
CB
37#include "proc_loadavg.h"
38
1f5596dd 39#include "bindings.h"
1f5596dd
CB
40#include "cgroup_fuse.h"
41#include "cgroups/cgroup.h"
42#include "cgroups/cgroup_utils.h"
43#include "memory_utils.h"
44#include "utils.h"
45
46/*
47 * This parameter is used for proc_loadavg_read().
48 * 1 means use loadavg, 0 means not use.
49 */
50static int loadavg = 0;
51
52/* The function of hash table.*/
53#define LOAD_SIZE 100 /*the size of hash_table */
54#define FLUSH_TIME 5 /*the flush rate */
55#define DEPTH_DIR 3 /*the depth of per cgroup */
56/* The function of calculate loadavg .*/
57#define FSHIFT 11 /* nr of bits of precision */
3fe133df 58#define FIXED_1 (1 << FSHIFT) /* 1.0 as fixed-point */
1f5596dd
CB
59#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
60#define EXP_5 2014 /* 1/exp(5sec/5min) */
61#define EXP_15 2037 /* 1/exp(5sec/15min) */
62#define LOAD_INT(x) ((x) >> FSHIFT)
63#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
64static volatile sig_atomic_t loadavg_stop = 0;
65
66struct load_node {
1ba088ae
CB
67 /* cgroup */
68 char *cg;
3fe133df 69 /* Load averages */
1ba088ae 70 uint64_t avenrun[3];
1f5596dd
CB
71 unsigned int run_pid;
72 unsigned int total_pid;
73 unsigned int last_pid;
1ba088ae
CB
74 /* The file descriptor of the mounted cgroup */
75 int cfd;
3fe133df
CB
76 struct load_node *next;
77 struct load_node **pre;
1f5596dd
CB
78};
79
80struct load_head {
81 /*
82 * The lock is about insert load_node and refresh load_node.To the first
83 * load_node of each hash bucket, insert and refresh in this hash bucket is
84 * mutually exclusive.
85 */
86 pthread_mutex_t lock;
87 /*
88 * The rdlock is about read loadavg and delete load_node.To each hash
89 * bucket, read and delete is mutually exclusive. But at the same time, we
90 * allow paratactic read operation. This rdlock is at list level.
91 */
92 pthread_rwlock_t rdlock;
93 /*
94 * The rilock is about read loadavg and insert load_node.To the first
95 * load_node of each hash bucket, read and insert is mutually exclusive.
96 * But at the same time, we allow paratactic read operation.
97 */
98 pthread_rwlock_t rilock;
99 struct load_node *next;
100};
101
102static struct load_head load_hash[LOAD_SIZE]; /* hash table */
103
104/*
105 * locate_node() finds special node. Not return NULL means success.
106 * It should be noted that rdlock isn't unlocked at the end of code
107 * because this function is used to read special node. Delete is not
108 * allowed before read has ended.
109 * unlock rdlock only in proc_loadavg_read().
110 */
111static struct load_node *locate_node(char *cg, int locate)
112{
113 struct load_node *f = NULL;
114 int i = 0;
115
116 pthread_rwlock_rdlock(&load_hash[locate].rilock);
117 pthread_rwlock_rdlock(&load_hash[locate].rdlock);
118 if (load_hash[locate].next == NULL) {
119 pthread_rwlock_unlock(&load_hash[locate].rilock);
120 return f;
121 }
122 f = load_hash[locate].next;
123 pthread_rwlock_unlock(&load_hash[locate].rilock);
124 while (f && ((i = strcmp(f->cg, cg)) != 0))
125 f = f->next;
126 return f;
127}
128
129static void insert_node(struct load_node **n, int locate)
130{
131 struct load_node *f;
132
133 pthread_mutex_lock(&load_hash[locate].lock);
134 pthread_rwlock_wrlock(&load_hash[locate].rilock);
135 f = load_hash[locate].next;
136 load_hash[locate].next = *n;
137
138 (*n)->pre = &(load_hash[locate].next);
139 if (f)
140 f->pre = &((*n)->next);
141 (*n)->next = f;
142 pthread_mutex_unlock(&load_hash[locate].lock);
143 pthread_rwlock_unlock(&load_hash[locate].rilock);
144}
145
4ec5c9da 146int calc_hash(const char *name)
1f5596dd
CB
147{
148 unsigned int hash = 0;
149 unsigned int x = 0;
b7604bf9 150
1f5596dd
CB
151 /* ELFHash algorithm. */
152 while (*name) {
153 hash = (hash << 4) + *name++;
154 x = hash & 0xf0000000;
155 if (x != 0)
156 hash ^= (x >> 24);
157 hash &= ~x;
158 }
b7604bf9 159
1f5596dd
CB
160 return (hash & 0x7fffffff);
161}
162
163int proc_loadavg_read(char *buf, size_t size, off_t offset,
164 struct fuse_file_info *fi)
165{
b7604bf9 166 __do_free char *cg = NULL;
1f5596dd 167 struct fuse_context *fc = fuse_get_context();
99b183fb 168 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1f5596dd 169 pid_t initpid;
4f18a602 170 ssize_t total_len = 0;
1f5596dd
CB
171 struct load_node *n;
172 int hash;
b7604bf9 173 int cfd;
1ba088ae 174 uint64_t a, b, c;
1f5596dd
CB
175
176 if (offset) {
3cf1e562 177 size_t left;
1f5596dd
CB
178
179 if (offset > d->size)
180 return -EINVAL;
181
182 if (!d->cached)
183 return 0;
184
185 left = d->size - offset;
186 total_len = left > size ? size : left;
cde2554c 187 memcpy(buf, d->buf + offset, total_len);
1f5596dd
CB
188
189 return total_len;
190 }
191 if (!loadavg)
192 return read_file_fuse("/proc/loadavg", buf, size, d);
193
194 initpid = lookup_initpid_in_store(fc->pid);
195 if (initpid <= 1 || is_shared_pidns(initpid))
196 initpid = fc->pid;
197
198 cg = get_pid_cgroup(initpid, "cpu");
199 if (!cg)
200 return read_file_fuse("/proc/loadavg", buf, size, d);
201
202 prune_init_slice(cg);
203 hash = calc_hash(cg) % LOAD_SIZE;
204 n = locate_node(cg, hash);
205
206 /* First time */
207 if (n == NULL) {
208 cfd = get_cgroup_fd("cpu");
3fe133df 209 if (cfd < 0) {
1f5596dd
CB
210 /*
211 * In locate_node() above, pthread_rwlock_unlock() isn't used
212 * because delete is not allowed before read has ended.
213 */
214 pthread_rwlock_unlock(&load_hash[hash].rdlock);
3fe133df 215 return read_file_fuse("/proc/loadavg", buf, size, d);
1f5596dd 216 }
b7604bf9 217
cb4bf06b 218 n = must_realloc(NULL, sizeof(struct load_node));
70f7563e 219 n->cg = move_ptr(cg);
1f5596dd
CB
220 n->avenrun[0] = 0;
221 n->avenrun[1] = 0;
222 n->avenrun[2] = 0;
223 n->run_pid = 0;
224 n->total_pid = 1;
225 n->last_pid = initpid;
226 n->cfd = cfd;
227 insert_node(&n, hash);
228 }
3fe133df
CB
229 a = n->avenrun[0] + (FIXED_1 / 200);
230 b = n->avenrun[1] + (FIXED_1 / 200);
231 c = n->avenrun[2] + (FIXED_1 / 200);
b7604bf9
CB
232 total_len = snprintf(d->buf, d->buflen,
233 "%lu.%02lu "
234 "%lu.%02lu "
235 "%lu.%02lu "
236 "%d/"
3fe133df 237 "%d "
b7604bf9
CB
238 "%d\n",
239 LOAD_INT(a),
240 LOAD_FRAC(a),
241 LOAD_INT(b),
242 LOAD_FRAC(b),
243 LOAD_INT(c),
244 LOAD_FRAC(c),
245 n->run_pid,
246 n->total_pid,
247 n->last_pid);
1f5596dd 248 pthread_rwlock_unlock(&load_hash[hash].rdlock);
b7604bf9
CB
249 if (total_len < 0 || total_len >= d->buflen)
250 return log_error(0, "Failed to write to cache");
251
1f5596dd
CB
252 d->size = (int)total_len;
253 d->cached = 1;
254
3cf1e562 255 if ((size_t)total_len > size)
1f5596dd 256 total_len = size;
1f5596dd 257
b7604bf9
CB
258 memcpy(buf, d->buf, total_len);
259 return total_len;
1f5596dd
CB
260}
261
262/*
263 * Find the process pid from cgroup path.
264 * eg:from /sys/fs/cgroup/cpu/docker/containerid/cgroup.procs to find the process pid.
265 * @pid_buf : put pid to pid_buf.
266 * @dpath : the path of cgroup. eg: /docker/containerid or /docker/containerid/child-cgroup ...
267 * @depth : the depth of cgroup in container.
268 * @sum : return the number of pid.
269 * @cfd : the file descriptor of the mounted cgroup. eg: /sys/fs/cgroup/cpu
270 */
e771a80b 271static int calc_pid(char ***pid_buf, const char *rel_path, int depth, int sum, int cfd)
1f5596dd 272{
61ef3c5c 273 __do_free char *line = NULL, *path = NULL;
9b817e41 274 __do_free void *fdopen_cache = NULL;
05b7a16d 275 __do_close int fd = -EBADF;
1f5596dd
CB
276 __do_fclose FILE *f = NULL;
277 __do_closedir DIR *dir = NULL;
278 struct dirent *file;
279 size_t linelen = 0;
1f5596dd 280 int pd;
1f5596dd 281
e771a80b 282 fd = openat(cfd, rel_path, O_RDONLY | O_CLOEXEC);
1f5596dd
CB
283 if (fd < 0)
284 return sum;
285
3fe133df 286 dir = fdopendir(fd);
1f5596dd
CB
287 if (!dir)
288 return sum;
3fe133df
CB
289 /* Transfer ownership to fdopendir(). */
290 move_fd(fd);
1f5596dd
CB
291
292 while (((file = readdir(dir)) != NULL) && depth > 0) {
293 if (strcmp(file->d_name, ".") == 0)
294 continue;
295
296 if (strcmp(file->d_name, "..") == 0)
297 continue;
298
299 if (file->d_type == DT_DIR) {
e771a80b
CB
300 __do_free char *path_next = NULL;
301 path_next = must_make_path(rel_path, "/", file->d_name, NULL);
1f5596dd 302 pd = depth - 1;
e771a80b 303 sum = calc_pid(pid_buf, path_next, pd, sum, cfd);
1f5596dd
CB
304 }
305 }
306
e771a80b 307 path = must_make_path(rel_path, "/cgroup.procs", NULL);
3fe133df 308 fd = openat(cfd, path, O_RDONLY | O_CLOEXEC);
1f5596dd
CB
309 if (fd < 0)
310 return sum;
311
9b817e41 312 f = fdopen_cached(fd, "re", &fdopen_cache);
1f5596dd
CB
313 if (!f)
314 return sum;
315
316 while (getline(&line, &linelen, f) != -1) {
1f342679
CB
317 __do_free char *task_pid = NULL;
318 char **pid;
319
320 task_pid = strdup(line);
321 if (!task_pid)
322 return sum;
323
1f5596dd
CB
324 pid = realloc(*pid_buf, sizeof(char *) * (sum + 1));
325 if (!pid)
326 return sum;
327 *pid_buf = pid;
1f342679 328 *(*pid_buf + sum) = move_ptr(task_pid);
1f5596dd
CB
329 sum++;
330 }
331
332 return sum;
333}
334
335/*
336 * calc_load calculates the load according to the following formula:
337 * load1 = load0 * exp + active * (1 - exp)
338 *
339 * @load1: the new loadavg.
340 * @load0: the former loadavg.
341 * @active: the total number of running pid at this moment.
342 * @exp: the fixed-point defined in the beginning.
343 */
1ba088ae 344static uint64_t calc_load(uint64_t load, uint64_t exp, uint64_t active)
1f5596dd 345{
1ba088ae 346 uint64_t newload;
1f5596dd
CB
347
348 active = active > 0 ? active * FIXED_1 : 0;
349 newload = load * exp + active * (FIXED_1 - exp);
350 if (active >= load)
351 newload += FIXED_1 - 1;
352
353 return newload / FIXED_1;
354}
355
356/*
357 * Return 0 means that container p->cg is closed.
358 * Return -1 means that error occurred in refresh.
359 * Positive num equals the total number of pid.
360 */
3fe133df 361static int refresh_load(struct load_node *p, const char *path)
1f5596dd 362{
bef38939 363 char **idbuf = NULL;
3fe133df
CB
364 char proc_path[STRLITERALLEN("/proc//task//status") +
365 2 * INTTYPE_TO_STRLEN(pid_t) + 1];
1f5596dd
CB
366 int i, ret, run_pid = 0, total_pid = 0, last_pid = 0;
367 size_t linelen = 0;
368 int sum, length;
369 struct dirent *file;
370
3fe133df 371 idbuf = must_realloc(NULL, sizeof(char **));
1f5596dd
CB
372
373 sum = calc_pid(&idbuf, path, DEPTH_DIR, 0, p->cfd);
3fe133df 374 if (!sum)
1f5596dd
CB
375 goto out;
376
377 for (i = 0; i < sum; i++) {
378 __do_closedir DIR *dp = NULL;
379
acff9786 380 length = strlen(idbuf[i]) - 1;
1f5596dd 381 idbuf[i][length] = '\0';
3fe133df
CB
382
383 ret = snprintf(proc_path, sizeof(proc_path), "/proc/%s/task", idbuf[i]);
384 if (ret < 0 || (size_t)ret > sizeof(proc_path)) {
1f5596dd
CB
385 i = sum;
386 sum = -1;
3fe133df
CB
387 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
388 goto err_out;
1f5596dd
CB
389 }
390
391 dp = opendir(proc_path);
3fe133df
CB
392 if (!dp) {
393 lxcfs_error("Failed to open \"%s\"", proc_path);
394 continue;
395 }
b7604bf9 396
1f5596dd 397 while ((file = readdir(dp)) != NULL) {
c0e081ce 398 __do_free char *line = NULL;
1f5596dd
CB
399 __do_fclose FILE *f = NULL;
400
3fe133df 401 if (strcmp(file->d_name, ".") == 0)
1f5596dd 402 continue;
b7604bf9 403
3fe133df 404 if (strcmp(file->d_name, "..") == 0)
1f5596dd 405 continue;
b7604bf9 406
1f5596dd 407 total_pid++;
b7604bf9 408
3fe133df 409 /* We make the biggest pid become last_pid. */
1f5596dd
CB
410 ret = atof(file->d_name);
411 last_pid = (ret > last_pid) ? ret : last_pid;
412
3fe133df
CB
413 ret = snprintf(proc_path, sizeof(proc_path),
414 "/proc/%s/task/%s/status", idbuf[i], file->d_name);
415 if (ret < 0 || (size_t)ret > sizeof(proc_path)) {
1f5596dd
CB
416 i = sum;
417 sum = -1;
3fe133df
CB
418 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
419 goto err_out;
1f5596dd
CB
420 }
421
3fe133df
CB
422 f = fopen(proc_path, "re");
423 if (!f)
424 continue;
425
426 while (getline(&line, &linelen, f) != -1)
427 if ((line[0] == 'S') && (line[1] == 't'))
acff9786 428 break;
3fe133df
CB
429
430 if ((line[7] == 'R') || (line[7] == 'D'))
431 run_pid++;
1f5596dd
CB
432 }
433 }
3fe133df
CB
434
435 /* Calculate the loadavg. */
436 p->avenrun[0] = calc_load(p->avenrun[0], EXP_1, run_pid);
437 p->avenrun[1] = calc_load(p->avenrun[1], EXP_5, run_pid);
438 p->avenrun[2] = calc_load(p->avenrun[2], EXP_15, run_pid);
439 p->run_pid = run_pid;
440 p->total_pid = total_pid;
441 p->last_pid = last_pid;
1f5596dd
CB
442
443err_out:
444 for (; i > 0; i--)
b7604bf9 445 free(idbuf[i - 1]);
1f5596dd
CB
446out:
447 free(idbuf);
448 return sum;
449}
450
451/* Delete the load_node n and return the next node of it. */
452static struct load_node *del_node(struct load_node *n, int locate)
453{
454 struct load_node *g;
455
456 pthread_rwlock_wrlock(&load_hash[locate].rdlock);
457 if (n->next == NULL) {
458 *(n->pre) = NULL;
459 } else {
460 *(n->pre) = n->next;
461 n->next->pre = n->pre;
462 }
463 g = n->next;
464 free_disarm(n->cg);
465 free_disarm(n);
466 pthread_rwlock_unlock(&load_hash[locate].rdlock);
467 return g;
468}
469
470/*
471 * Traverse the hash table and update it.
472 */
473static void *load_begin(void *arg)
474{
475
2a5d697d 476 int first_node, sum;
1f5596dd 477 struct load_node *f;
1f5596dd
CB
478 clock_t time1, time2;
479
3fe133df 480 for (;;) {
1f5596dd
CB
481 if (loadavg_stop == 1)
482 return NULL;
483
484 time1 = clock();
3fe133df 485 for (int i = 0; i < LOAD_SIZE; i++) {
1f5596dd
CB
486 pthread_mutex_lock(&load_hash[i].lock);
487 if (load_hash[i].next == NULL) {
488 pthread_mutex_unlock(&load_hash[i].lock);
489 continue;
490 }
3fe133df 491
1f5596dd
CB
492 f = load_hash[i].next;
493 first_node = 1;
494 while (f) {
495 __do_free char *path = NULL;
496
2a5d697d 497 path = must_make_path_relative(f->cg, NULL);
1f5596dd
CB
498
499 sum = refresh_load(f, path);
500 if (sum == 0)
501 f = del_node(f, i);
502 else
2a5d697d
CB
503 f = f->next;
504
1f5596dd
CB
505 /* load_hash[i].lock locks only on the first node.*/
506 if (first_node == 1) {
507 first_node = 0;
508 pthread_mutex_unlock(&load_hash[i].lock);
509 }
510 }
511 }
512
513 if (loadavg_stop == 1)
514 return NULL;
515
516 time2 = clock();
2a5d697d
CB
517 usleep(FLUSH_TIME * 1000000 -
518 (int)((time2 - time1) * 1000000 / CLOCKS_PER_SEC));
1f5596dd
CB
519 }
520}
521
522/*
523 * init_load initialize the hash table.
524 * Return 0 on success, return -1 on failure.
525 */
526static int init_load(void)
527{
528 int i;
529 int ret;
530
531 for (i = 0; i < LOAD_SIZE; i++) {
532 load_hash[i].next = NULL;
533 ret = pthread_mutex_init(&load_hash[i].lock, NULL);
b7604bf9
CB
534 if (ret) {
535 lxcfs_error("Failed to initialize lock");
1f5596dd
CB
536 goto out3;
537 }
b7604bf9 538
1f5596dd 539 ret = pthread_rwlock_init(&load_hash[i].rdlock, NULL);
b7604bf9
CB
540 if (ret) {
541 lxcfs_error("Failed to initialize rdlock");
1f5596dd
CB
542 goto out2;
543 }
b7604bf9 544
1f5596dd 545 ret = pthread_rwlock_init(&load_hash[i].rilock, NULL);
b7604bf9
CB
546 if (ret) {
547 lxcfs_error("Failed to initialize rilock");
1f5596dd
CB
548 goto out1;
549 }
550 }
b7604bf9 551
1f5596dd 552 return 0;
b7604bf9 553
1f5596dd
CB
554out1:
555 pthread_rwlock_destroy(&load_hash[i].rdlock);
556out2:
557 pthread_mutex_destroy(&load_hash[i].lock);
558out3:
559 while (i > 0) {
560 i--;
561 pthread_mutex_destroy(&load_hash[i].lock);
562 pthread_rwlock_destroy(&load_hash[i].rdlock);
563 pthread_rwlock_destroy(&load_hash[i].rilock);
564 }
b7604bf9 565
1f5596dd
CB
566 return -1;
567}
568
569static void load_free(void)
570{
571 struct load_node *f, *p;
572
573 for (int i = 0; i < LOAD_SIZE; i++) {
574 pthread_mutex_lock(&load_hash[i].lock);
575 pthread_rwlock_wrlock(&load_hash[i].rilock);
576 pthread_rwlock_wrlock(&load_hash[i].rdlock);
577 if (load_hash[i].next == NULL) {
578 pthread_mutex_unlock(&load_hash[i].lock);
579 pthread_mutex_destroy(&load_hash[i].lock);
580 pthread_rwlock_unlock(&load_hash[i].rilock);
581 pthread_rwlock_destroy(&load_hash[i].rilock);
582 pthread_rwlock_unlock(&load_hash[i].rdlock);
583 pthread_rwlock_destroy(&load_hash[i].rdlock);
584 continue;
585 }
586
587 for (f = load_hash[i].next; f;) {
588 free_disarm(f->cg);
589 p = f->next;
590 free_disarm(f);
591 f = p;
592 }
593
594 pthread_mutex_unlock(&load_hash[i].lock);
595 pthread_mutex_destroy(&load_hash[i].lock);
596 pthread_rwlock_unlock(&load_hash[i].rilock);
597 pthread_rwlock_destroy(&load_hash[i].rilock);
598 pthread_rwlock_unlock(&load_hash[i].rdlock);
599 pthread_rwlock_destroy(&load_hash[i].rdlock);
600 }
601}
602
603/* Return a positive number on success, return 0 on failure.*/
604pthread_t load_daemon(int load_use)
605{
606 int ret;
607 pthread_t pid;
608
609 ret = init_load();
b7604bf9
CB
610 if (ret == -1)
611 return log_error(0, "Initialize hash_table fails in load_daemon!");
612
1f5596dd
CB
613 ret = pthread_create(&pid, NULL, load_begin, NULL);
614 if (ret != 0) {
1f5596dd 615 load_free();
b7604bf9 616 return log_error(0, "Create pthread fails in load_daemon!");
1f5596dd 617 }
b7604bf9 618
1f5596dd
CB
619 /* use loadavg, here loadavg = 1*/
620 loadavg = load_use;
621 return pid;
622}
623
624/* Returns 0 on success. */
625int stop_load_daemon(pthread_t pid)
626{
627 int s;
628
629 /* Signal the thread to gracefully stop */
630 loadavg_stop = 1;
631
632 s = pthread_join(pid, NULL); /* Make sure sub thread has been canceled. */
b7604bf9
CB
633 if (s)
634 return log_error(-1, "stop_load_daemon error: failed to join");
1f5596dd
CB
635
636 load_free();
637 loadavg_stop = 0;
638
639 return 0;
640}