]> git.proxmox.com Git - mirror_lxcfs.git/blame - src/proc_loadavg.c
tree-wide: set _GNU_SOURCE in meson.build
[mirror_lxcfs.git] / src / proc_loadavg.c
CommitLineData
db0463bf 1/* SPDX-License-Identifier: LGPL-2.1+ */
1f5596dd 2
f834b6bf
SP
3#include "config.h"
4
1f5596dd
CB
5#include <dirent.h>
6#include <errno.h>
7#include <fcntl.h>
1f5596dd
CB
8#include <inttypes.h>
9#include <libgen.h>
10#include <pthread.h>
11#include <sched.h>
12#include <stdarg.h>
13#include <stdbool.h>
14#include <stdint.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18#include <time.h>
19#include <unistd.h>
20#include <wait.h>
21#include <linux/magic.h>
22#include <linux/sched.h>
23#include <sys/epoll.h>
24#include <sys/mman.h>
25#include <sys/mount.h>
26#include <sys/param.h>
27#include <sys/socket.h>
28#include <sys/syscall.h>
29#include <sys/sysinfo.h>
30#include <sys/vfs.h>
31
e01afbb7
CB
32#include "proc_loadavg.h"
33
1f5596dd 34#include "bindings.h"
1f5596dd
CB
35#include "cgroup_fuse.h"
36#include "cgroups/cgroup.h"
37#include "cgroups/cgroup_utils.h"
38#include "memory_utils.h"
39#include "utils.h"
40
41/*
42 * This parameter is used for proc_loadavg_read().
43 * 1 means use loadavg, 0 means not use.
44 */
45static int loadavg = 0;
46
47/* The function of hash table.*/
48#define LOAD_SIZE 100 /*the size of hash_table */
49#define FLUSH_TIME 5 /*the flush rate */
50#define DEPTH_DIR 3 /*the depth of per cgroup */
51/* The function of calculate loadavg .*/
52#define FSHIFT 11 /* nr of bits of precision */
3fe133df 53#define FIXED_1 (1 << FSHIFT) /* 1.0 as fixed-point */
1f5596dd
CB
54#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
55#define EXP_5 2014 /* 1/exp(5sec/5min) */
56#define EXP_15 2037 /* 1/exp(5sec/15min) */
57#define LOAD_INT(x) ((x) >> FSHIFT)
58#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
59static volatile sig_atomic_t loadavg_stop = 0;
60
61struct load_node {
1ba088ae
CB
62 /* cgroup */
63 char *cg;
3fe133df 64 /* Load averages */
1ba088ae 65 uint64_t avenrun[3];
1f5596dd
CB
66 unsigned int run_pid;
67 unsigned int total_pid;
68 unsigned int last_pid;
1ba088ae
CB
69 /* The file descriptor of the mounted cgroup */
70 int cfd;
3fe133df
CB
71 struct load_node *next;
72 struct load_node **pre;
1f5596dd
CB
73};
74
75struct load_head {
76 /*
77 * The lock is about insert load_node and refresh load_node.To the first
78 * load_node of each hash bucket, insert and refresh in this hash bucket is
79 * mutually exclusive.
80 */
81 pthread_mutex_t lock;
82 /*
83 * The rdlock is about read loadavg and delete load_node.To each hash
84 * bucket, read and delete is mutually exclusive. But at the same time, we
85 * allow paratactic read operation. This rdlock is at list level.
86 */
87 pthread_rwlock_t rdlock;
88 /*
89 * The rilock is about read loadavg and insert load_node.To the first
90 * load_node of each hash bucket, read and insert is mutually exclusive.
91 * But at the same time, we allow paratactic read operation.
92 */
93 pthread_rwlock_t rilock;
94 struct load_node *next;
95};
96
97static struct load_head load_hash[LOAD_SIZE]; /* hash table */
98
99/*
100 * locate_node() finds special node. Not return NULL means success.
101 * It should be noted that rdlock isn't unlocked at the end of code
102 * because this function is used to read special node. Delete is not
103 * allowed before read has ended.
104 * unlock rdlock only in proc_loadavg_read().
105 */
106static struct load_node *locate_node(char *cg, int locate)
107{
108 struct load_node *f = NULL;
109 int i = 0;
110
111 pthread_rwlock_rdlock(&load_hash[locate].rilock);
112 pthread_rwlock_rdlock(&load_hash[locate].rdlock);
113 if (load_hash[locate].next == NULL) {
114 pthread_rwlock_unlock(&load_hash[locate].rilock);
115 return f;
116 }
117 f = load_hash[locate].next;
118 pthread_rwlock_unlock(&load_hash[locate].rilock);
119 while (f && ((i = strcmp(f->cg, cg)) != 0))
120 f = f->next;
121 return f;
122}
123
124static void insert_node(struct load_node **n, int locate)
125{
126 struct load_node *f;
127
128 pthread_mutex_lock(&load_hash[locate].lock);
129 pthread_rwlock_wrlock(&load_hash[locate].rilock);
130 f = load_hash[locate].next;
131 load_hash[locate].next = *n;
132
133 (*n)->pre = &(load_hash[locate].next);
134 if (f)
135 f->pre = &((*n)->next);
136 (*n)->next = f;
137 pthread_mutex_unlock(&load_hash[locate].lock);
138 pthread_rwlock_unlock(&load_hash[locate].rilock);
139}
140
4ec5c9da 141int calc_hash(const char *name)
1f5596dd
CB
142{
143 unsigned int hash = 0;
144 unsigned int x = 0;
b7604bf9 145
1f5596dd
CB
146 /* ELFHash algorithm. */
147 while (*name) {
148 hash = (hash << 4) + *name++;
149 x = hash & 0xf0000000;
150 if (x != 0)
151 hash ^= (x >> 24);
152 hash &= ~x;
153 }
b7604bf9 154
1f5596dd
CB
155 return (hash & 0x7fffffff);
156}
157
158int proc_loadavg_read(char *buf, size_t size, off_t offset,
159 struct fuse_file_info *fi)
160{
b7604bf9 161 __do_free char *cg = NULL;
1f5596dd 162 struct fuse_context *fc = fuse_get_context();
99b183fb 163 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1f5596dd 164 pid_t initpid;
4f18a602 165 ssize_t total_len = 0;
1f5596dd
CB
166 struct load_node *n;
167 int hash;
b7604bf9 168 int cfd;
1ba088ae 169 uint64_t a, b, c;
1f5596dd
CB
170
171 if (offset) {
3cf1e562 172 size_t left;
1f5596dd
CB
173
174 if (offset > d->size)
175 return -EINVAL;
176
177 if (!d->cached)
178 return 0;
179
180 left = d->size - offset;
181 total_len = left > size ? size : left;
cde2554c 182 memcpy(buf, d->buf + offset, total_len);
1f5596dd
CB
183
184 return total_len;
185 }
186 if (!loadavg)
187 return read_file_fuse("/proc/loadavg", buf, size, d);
188
189 initpid = lookup_initpid_in_store(fc->pid);
190 if (initpid <= 1 || is_shared_pidns(initpid))
191 initpid = fc->pid;
192
193 cg = get_pid_cgroup(initpid, "cpu");
194 if (!cg)
195 return read_file_fuse("/proc/loadavg", buf, size, d);
196
197 prune_init_slice(cg);
198 hash = calc_hash(cg) % LOAD_SIZE;
199 n = locate_node(cg, hash);
200
201 /* First time */
202 if (n == NULL) {
203 cfd = get_cgroup_fd("cpu");
3fe133df 204 if (cfd < 0) {
1f5596dd
CB
205 /*
206 * In locate_node() above, pthread_rwlock_unlock() isn't used
207 * because delete is not allowed before read has ended.
208 */
209 pthread_rwlock_unlock(&load_hash[hash].rdlock);
3fe133df 210 return read_file_fuse("/proc/loadavg", buf, size, d);
1f5596dd 211 }
b7604bf9 212
cb4bf06b 213 n = must_realloc(NULL, sizeof(struct load_node));
70f7563e 214 n->cg = move_ptr(cg);
1f5596dd
CB
215 n->avenrun[0] = 0;
216 n->avenrun[1] = 0;
217 n->avenrun[2] = 0;
218 n->run_pid = 0;
219 n->total_pid = 1;
220 n->last_pid = initpid;
221 n->cfd = cfd;
222 insert_node(&n, hash);
223 }
3fe133df
CB
224 a = n->avenrun[0] + (FIXED_1 / 200);
225 b = n->avenrun[1] + (FIXED_1 / 200);
226 c = n->avenrun[2] + (FIXED_1 / 200);
b7604bf9
CB
227 total_len = snprintf(d->buf, d->buflen,
228 "%lu.%02lu "
229 "%lu.%02lu "
230 "%lu.%02lu "
231 "%d/"
3fe133df 232 "%d "
b7604bf9
CB
233 "%d\n",
234 LOAD_INT(a),
235 LOAD_FRAC(a),
236 LOAD_INT(b),
237 LOAD_FRAC(b),
238 LOAD_INT(c),
239 LOAD_FRAC(c),
240 n->run_pid,
241 n->total_pid,
242 n->last_pid);
1f5596dd 243 pthread_rwlock_unlock(&load_hash[hash].rdlock);
b7604bf9
CB
244 if (total_len < 0 || total_len >= d->buflen)
245 return log_error(0, "Failed to write to cache");
246
1f5596dd
CB
247 d->size = (int)total_len;
248 d->cached = 1;
249
3cf1e562 250 if ((size_t)total_len > size)
1f5596dd 251 total_len = size;
1f5596dd 252
b7604bf9
CB
253 memcpy(buf, d->buf, total_len);
254 return total_len;
1f5596dd
CB
255}
256
257/*
258 * Find the process pid from cgroup path.
259 * eg:from /sys/fs/cgroup/cpu/docker/containerid/cgroup.procs to find the process pid.
260 * @pid_buf : put pid to pid_buf.
261 * @dpath : the path of cgroup. eg: /docker/containerid or /docker/containerid/child-cgroup ...
262 * @depth : the depth of cgroup in container.
263 * @sum : return the number of pid.
264 * @cfd : the file descriptor of the mounted cgroup. eg: /sys/fs/cgroup/cpu
265 */
e771a80b 266static int calc_pid(char ***pid_buf, const char *rel_path, int depth, int sum, int cfd)
1f5596dd 267{
61ef3c5c 268 __do_free char *line = NULL, *path = NULL;
9b817e41 269 __do_free void *fdopen_cache = NULL;
05b7a16d 270 __do_close int fd = -EBADF;
1f5596dd
CB
271 __do_fclose FILE *f = NULL;
272 __do_closedir DIR *dir = NULL;
273 struct dirent *file;
274 size_t linelen = 0;
1f5596dd 275 int pd;
1f5596dd 276
e771a80b 277 fd = openat(cfd, rel_path, O_RDONLY | O_CLOEXEC);
1f5596dd
CB
278 if (fd < 0)
279 return sum;
280
3fe133df 281 dir = fdopendir(fd);
1f5596dd
CB
282 if (!dir)
283 return sum;
3fe133df
CB
284 /* Transfer ownership to fdopendir(). */
285 move_fd(fd);
1f5596dd
CB
286
287 while (((file = readdir(dir)) != NULL) && depth > 0) {
288 if (strcmp(file->d_name, ".") == 0)
289 continue;
290
291 if (strcmp(file->d_name, "..") == 0)
292 continue;
293
294 if (file->d_type == DT_DIR) {
e771a80b
CB
295 __do_free char *path_next = NULL;
296 path_next = must_make_path(rel_path, "/", file->d_name, NULL);
1f5596dd 297 pd = depth - 1;
e771a80b 298 sum = calc_pid(pid_buf, path_next, pd, sum, cfd);
1f5596dd
CB
299 }
300 }
301
e771a80b 302 path = must_make_path(rel_path, "/cgroup.procs", NULL);
3fe133df 303 fd = openat(cfd, path, O_RDONLY | O_CLOEXEC);
1f5596dd
CB
304 if (fd < 0)
305 return sum;
306
9b817e41 307 f = fdopen_cached(fd, "re", &fdopen_cache);
1f5596dd
CB
308 if (!f)
309 return sum;
310
311 while (getline(&line, &linelen, f) != -1) {
1f342679
CB
312 __do_free char *task_pid = NULL;
313 char **pid;
314
315 task_pid = strdup(line);
316 if (!task_pid)
317 return sum;
318
1f5596dd
CB
319 pid = realloc(*pid_buf, sizeof(char *) * (sum + 1));
320 if (!pid)
321 return sum;
322 *pid_buf = pid;
1f342679 323 *(*pid_buf + sum) = move_ptr(task_pid);
1f5596dd
CB
324 sum++;
325 }
326
327 return sum;
328}
329
330/*
331 * calc_load calculates the load according to the following formula:
332 * load1 = load0 * exp + active * (1 - exp)
333 *
334 * @load1: the new loadavg.
335 * @load0: the former loadavg.
336 * @active: the total number of running pid at this moment.
337 * @exp: the fixed-point defined in the beginning.
338 */
1ba088ae 339static uint64_t calc_load(uint64_t load, uint64_t exp, uint64_t active)
1f5596dd 340{
1ba088ae 341 uint64_t newload;
1f5596dd
CB
342
343 active = active > 0 ? active * FIXED_1 : 0;
344 newload = load * exp + active * (FIXED_1 - exp);
345 if (active >= load)
346 newload += FIXED_1 - 1;
347
348 return newload / FIXED_1;
349}
350
351/*
352 * Return 0 means that container p->cg is closed.
353 * Return -1 means that error occurred in refresh.
354 * Positive num equals the total number of pid.
355 */
3fe133df 356static int refresh_load(struct load_node *p, const char *path)
1f5596dd 357{
bef38939 358 char **idbuf = NULL;
3fe133df
CB
359 char proc_path[STRLITERALLEN("/proc//task//status") +
360 2 * INTTYPE_TO_STRLEN(pid_t) + 1];
1f5596dd
CB
361 int i, ret, run_pid = 0, total_pid = 0, last_pid = 0;
362 size_t linelen = 0;
363 int sum, length;
364 struct dirent *file;
365
3fe133df 366 idbuf = must_realloc(NULL, sizeof(char **));
1f5596dd
CB
367
368 sum = calc_pid(&idbuf, path, DEPTH_DIR, 0, p->cfd);
3fe133df 369 if (!sum)
1f5596dd
CB
370 goto out;
371
372 for (i = 0; i < sum; i++) {
373 __do_closedir DIR *dp = NULL;
374
acff9786 375 length = strlen(idbuf[i]) - 1;
1f5596dd 376 idbuf[i][length] = '\0';
3fe133df
CB
377
378 ret = snprintf(proc_path, sizeof(proc_path), "/proc/%s/task", idbuf[i]);
379 if (ret < 0 || (size_t)ret > sizeof(proc_path)) {
1f5596dd
CB
380 i = sum;
381 sum = -1;
3fe133df
CB
382 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
383 goto err_out;
1f5596dd
CB
384 }
385
386 dp = opendir(proc_path);
3fe133df
CB
387 if (!dp) {
388 lxcfs_error("Failed to open \"%s\"", proc_path);
389 continue;
390 }
b7604bf9 391
1f5596dd 392 while ((file = readdir(dp)) != NULL) {
c0e081ce 393 __do_free char *line = NULL;
1f5596dd
CB
394 __do_fclose FILE *f = NULL;
395
3fe133df 396 if (strcmp(file->d_name, ".") == 0)
1f5596dd 397 continue;
b7604bf9 398
3fe133df 399 if (strcmp(file->d_name, "..") == 0)
1f5596dd 400 continue;
b7604bf9 401
1f5596dd 402 total_pid++;
b7604bf9 403
3fe133df 404 /* We make the biggest pid become last_pid. */
1f5596dd
CB
405 ret = atof(file->d_name);
406 last_pid = (ret > last_pid) ? ret : last_pid;
407
3fe133df
CB
408 ret = snprintf(proc_path, sizeof(proc_path),
409 "/proc/%s/task/%s/status", idbuf[i], file->d_name);
410 if (ret < 0 || (size_t)ret > sizeof(proc_path)) {
1f5596dd
CB
411 i = sum;
412 sum = -1;
3fe133df
CB
413 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
414 goto err_out;
1f5596dd
CB
415 }
416
3fe133df
CB
417 f = fopen(proc_path, "re");
418 if (!f)
419 continue;
420
421 while (getline(&line, &linelen, f) != -1)
422 if ((line[0] == 'S') && (line[1] == 't'))
acff9786 423 break;
3fe133df
CB
424
425 if ((line[7] == 'R') || (line[7] == 'D'))
426 run_pid++;
1f5596dd
CB
427 }
428 }
3fe133df
CB
429
430 /* Calculate the loadavg. */
431 p->avenrun[0] = calc_load(p->avenrun[0], EXP_1, run_pid);
432 p->avenrun[1] = calc_load(p->avenrun[1], EXP_5, run_pid);
433 p->avenrun[2] = calc_load(p->avenrun[2], EXP_15, run_pid);
434 p->run_pid = run_pid;
435 p->total_pid = total_pid;
436 p->last_pid = last_pid;
1f5596dd
CB
437
438err_out:
439 for (; i > 0; i--)
b7604bf9 440 free(idbuf[i - 1]);
1f5596dd
CB
441out:
442 free(idbuf);
443 return sum;
444}
445
446/* Delete the load_node n and return the next node of it. */
447static struct load_node *del_node(struct load_node *n, int locate)
448{
449 struct load_node *g;
450
451 pthread_rwlock_wrlock(&load_hash[locate].rdlock);
452 if (n->next == NULL) {
453 *(n->pre) = NULL;
454 } else {
455 *(n->pre) = n->next;
456 n->next->pre = n->pre;
457 }
458 g = n->next;
459 free_disarm(n->cg);
460 free_disarm(n);
461 pthread_rwlock_unlock(&load_hash[locate].rdlock);
462 return g;
463}
464
465/*
466 * Traverse the hash table and update it.
467 */
468static void *load_begin(void *arg)
469{
470
2a5d697d 471 int first_node, sum;
1f5596dd 472 struct load_node *f;
1f5596dd
CB
473 clock_t time1, time2;
474
3fe133df 475 for (;;) {
1f5596dd
CB
476 if (loadavg_stop == 1)
477 return NULL;
478
479 time1 = clock();
3fe133df 480 for (int i = 0; i < LOAD_SIZE; i++) {
1f5596dd
CB
481 pthread_mutex_lock(&load_hash[i].lock);
482 if (load_hash[i].next == NULL) {
483 pthread_mutex_unlock(&load_hash[i].lock);
484 continue;
485 }
3fe133df 486
1f5596dd
CB
487 f = load_hash[i].next;
488 first_node = 1;
489 while (f) {
490 __do_free char *path = NULL;
491
2a5d697d 492 path = must_make_path_relative(f->cg, NULL);
1f5596dd
CB
493
494 sum = refresh_load(f, path);
495 if (sum == 0)
496 f = del_node(f, i);
497 else
2a5d697d
CB
498 f = f->next;
499
1f5596dd
CB
500 /* load_hash[i].lock locks only on the first node.*/
501 if (first_node == 1) {
502 first_node = 0;
503 pthread_mutex_unlock(&load_hash[i].lock);
504 }
505 }
506 }
507
508 if (loadavg_stop == 1)
509 return NULL;
510
511 time2 = clock();
2a5d697d
CB
512 usleep(FLUSH_TIME * 1000000 -
513 (int)((time2 - time1) * 1000000 / CLOCKS_PER_SEC));
1f5596dd
CB
514 }
515}
516
517/*
518 * init_load initialize the hash table.
519 * Return 0 on success, return -1 on failure.
520 */
521static int init_load(void)
522{
523 int i;
524 int ret;
525
526 for (i = 0; i < LOAD_SIZE; i++) {
527 load_hash[i].next = NULL;
528 ret = pthread_mutex_init(&load_hash[i].lock, NULL);
b7604bf9
CB
529 if (ret) {
530 lxcfs_error("Failed to initialize lock");
1f5596dd
CB
531 goto out3;
532 }
b7604bf9 533
1f5596dd 534 ret = pthread_rwlock_init(&load_hash[i].rdlock, NULL);
b7604bf9
CB
535 if (ret) {
536 lxcfs_error("Failed to initialize rdlock");
1f5596dd
CB
537 goto out2;
538 }
b7604bf9 539
1f5596dd 540 ret = pthread_rwlock_init(&load_hash[i].rilock, NULL);
b7604bf9
CB
541 if (ret) {
542 lxcfs_error("Failed to initialize rilock");
1f5596dd
CB
543 goto out1;
544 }
545 }
b7604bf9 546
1f5596dd 547 return 0;
b7604bf9 548
1f5596dd
CB
549out1:
550 pthread_rwlock_destroy(&load_hash[i].rdlock);
551out2:
552 pthread_mutex_destroy(&load_hash[i].lock);
553out3:
554 while (i > 0) {
555 i--;
556 pthread_mutex_destroy(&load_hash[i].lock);
557 pthread_rwlock_destroy(&load_hash[i].rdlock);
558 pthread_rwlock_destroy(&load_hash[i].rilock);
559 }
b7604bf9 560
1f5596dd
CB
561 return -1;
562}
563
564static void load_free(void)
565{
566 struct load_node *f, *p;
567
568 for (int i = 0; i < LOAD_SIZE; i++) {
569 pthread_mutex_lock(&load_hash[i].lock);
570 pthread_rwlock_wrlock(&load_hash[i].rilock);
571 pthread_rwlock_wrlock(&load_hash[i].rdlock);
572 if (load_hash[i].next == NULL) {
573 pthread_mutex_unlock(&load_hash[i].lock);
574 pthread_mutex_destroy(&load_hash[i].lock);
575 pthread_rwlock_unlock(&load_hash[i].rilock);
576 pthread_rwlock_destroy(&load_hash[i].rilock);
577 pthread_rwlock_unlock(&load_hash[i].rdlock);
578 pthread_rwlock_destroy(&load_hash[i].rdlock);
579 continue;
580 }
581
582 for (f = load_hash[i].next; f;) {
583 free_disarm(f->cg);
584 p = f->next;
585 free_disarm(f);
586 f = p;
587 }
588
589 pthread_mutex_unlock(&load_hash[i].lock);
590 pthread_mutex_destroy(&load_hash[i].lock);
591 pthread_rwlock_unlock(&load_hash[i].rilock);
592 pthread_rwlock_destroy(&load_hash[i].rilock);
593 pthread_rwlock_unlock(&load_hash[i].rdlock);
594 pthread_rwlock_destroy(&load_hash[i].rdlock);
595 }
596}
597
598/* Return a positive number on success, return 0 on failure.*/
599pthread_t load_daemon(int load_use)
600{
601 int ret;
602 pthread_t pid;
603
604 ret = init_load();
b7604bf9
CB
605 if (ret == -1)
606 return log_error(0, "Initialize hash_table fails in load_daemon!");
607
1f5596dd
CB
608 ret = pthread_create(&pid, NULL, load_begin, NULL);
609 if (ret != 0) {
1f5596dd 610 load_free();
b7604bf9 611 return log_error(0, "Create pthread fails in load_daemon!");
1f5596dd 612 }
b7604bf9 613
1f5596dd
CB
614 /* use loadavg, here loadavg = 1*/
615 loadavg = load_use;
616 return pid;
617}
618
619/* Returns 0 on success. */
620int stop_load_daemon(pthread_t pid)
621{
622 int s;
623
624 /* Signal the thread to gracefully stop */
625 loadavg_stop = 1;
626
627 s = pthread_join(pid, NULL); /* Make sure sub thread has been canceled. */
b7604bf9
CB
628 if (s)
629 return log_error(-1, "stop_load_daemon error: failed to join");
1f5596dd
CB
630
631 load_free();
632 loadavg_stop = 0;
633
634 return 0;
635}