]> git.proxmox.com Git - mirror_lxcfs.git/blame - src/proc_loadavg.c
proc_loadavg: use must_* alloc helpers
[mirror_lxcfs.git] / src / proc_loadavg.c
CommitLineData
db0463bf 1/* SPDX-License-Identifier: LGPL-2.1+ */
1f5596dd
CB
2
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE
5#endif
6
7#ifndef FUSE_USE_VERSION
8#define FUSE_USE_VERSION 26
9#endif
10
11#define _FILE_OFFSET_BITS 64
12
13#define __STDC_FORMAT_MACROS
14#include <dirent.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <fuse.h>
18#include <inttypes.h>
19#include <libgen.h>
20#include <pthread.h>
21#include <sched.h>
22#include <stdarg.h>
23#include <stdbool.h>
24#include <stdint.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <time.h>
29#include <unistd.h>
30#include <wait.h>
31#include <linux/magic.h>
32#include <linux/sched.h>
33#include <sys/epoll.h>
34#include <sys/mman.h>
35#include <sys/mount.h>
36#include <sys/param.h>
37#include <sys/socket.h>
38#include <sys/syscall.h>
39#include <sys/sysinfo.h>
40#include <sys/vfs.h>
41
42#include "bindings.h"
43#include "config.h"
44#include "cgroup_fuse.h"
45#include "cgroups/cgroup.h"
46#include "cgroups/cgroup_utils.h"
47#include "memory_utils.h"
48#include "utils.h"
49
50/*
51 * This parameter is used for proc_loadavg_read().
52 * 1 means use loadavg, 0 means not use.
53 */
54static int loadavg = 0;
55
56/* The function of hash table.*/
57#define LOAD_SIZE 100 /*the size of hash_table */
58#define FLUSH_TIME 5 /*the flush rate */
59#define DEPTH_DIR 3 /*the depth of per cgroup */
60/* The function of calculate loadavg .*/
61#define FSHIFT 11 /* nr of bits of precision */
3fe133df 62#define FIXED_1 (1 << FSHIFT) /* 1.0 as fixed-point */
1f5596dd
CB
63#define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
64#define EXP_5 2014 /* 1/exp(5sec/5min) */
65#define EXP_15 2037 /* 1/exp(5sec/15min) */
66#define LOAD_INT(x) ((x) >> FSHIFT)
67#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
68static volatile sig_atomic_t loadavg_stop = 0;
69
70struct load_node {
1ba088ae
CB
71 /* cgroup */
72 char *cg;
3fe133df 73 /* Load averages */
1ba088ae 74 uint64_t avenrun[3];
1f5596dd
CB
75 unsigned int run_pid;
76 unsigned int total_pid;
77 unsigned int last_pid;
1ba088ae
CB
78 /* The file descriptor of the mounted cgroup */
79 int cfd;
3fe133df
CB
80 struct load_node *next;
81 struct load_node **pre;
1f5596dd
CB
82};
83
84struct load_head {
85 /*
86 * The lock is about insert load_node and refresh load_node.To the first
87 * load_node of each hash bucket, insert and refresh in this hash bucket is
88 * mutually exclusive.
89 */
90 pthread_mutex_t lock;
91 /*
92 * The rdlock is about read loadavg and delete load_node.To each hash
93 * bucket, read and delete is mutually exclusive. But at the same time, we
94 * allow paratactic read operation. This rdlock is at list level.
95 */
96 pthread_rwlock_t rdlock;
97 /*
98 * The rilock is about read loadavg and insert load_node.To the first
99 * load_node of each hash bucket, read and insert is mutually exclusive.
100 * But at the same time, we allow paratactic read operation.
101 */
102 pthread_rwlock_t rilock;
103 struct load_node *next;
104};
105
106static struct load_head load_hash[LOAD_SIZE]; /* hash table */
107
108/*
109 * locate_node() finds special node. Not return NULL means success.
110 * It should be noted that rdlock isn't unlocked at the end of code
111 * because this function is used to read special node. Delete is not
112 * allowed before read has ended.
113 * unlock rdlock only in proc_loadavg_read().
114 */
115static struct load_node *locate_node(char *cg, int locate)
116{
117 struct load_node *f = NULL;
118 int i = 0;
119
120 pthread_rwlock_rdlock(&load_hash[locate].rilock);
121 pthread_rwlock_rdlock(&load_hash[locate].rdlock);
122 if (load_hash[locate].next == NULL) {
123 pthread_rwlock_unlock(&load_hash[locate].rilock);
124 return f;
125 }
126 f = load_hash[locate].next;
127 pthread_rwlock_unlock(&load_hash[locate].rilock);
128 while (f && ((i = strcmp(f->cg, cg)) != 0))
129 f = f->next;
130 return f;
131}
132
133static void insert_node(struct load_node **n, int locate)
134{
135 struct load_node *f;
136
137 pthread_mutex_lock(&load_hash[locate].lock);
138 pthread_rwlock_wrlock(&load_hash[locate].rilock);
139 f = load_hash[locate].next;
140 load_hash[locate].next = *n;
141
142 (*n)->pre = &(load_hash[locate].next);
143 if (f)
144 f->pre = &((*n)->next);
145 (*n)->next = f;
146 pthread_mutex_unlock(&load_hash[locate].lock);
147 pthread_rwlock_unlock(&load_hash[locate].rilock);
148}
149
4ec5c9da 150int calc_hash(const char *name)
1f5596dd
CB
151{
152 unsigned int hash = 0;
153 unsigned int x = 0;
b7604bf9 154
1f5596dd
CB
155 /* ELFHash algorithm. */
156 while (*name) {
157 hash = (hash << 4) + *name++;
158 x = hash & 0xf0000000;
159 if (x != 0)
160 hash ^= (x >> 24);
161 hash &= ~x;
162 }
b7604bf9 163
1f5596dd
CB
164 return (hash & 0x7fffffff);
165}
166
167int proc_loadavg_read(char *buf, size_t size, off_t offset,
168 struct fuse_file_info *fi)
169{
b7604bf9 170 __do_free char *cg = NULL;
1f5596dd 171 struct fuse_context *fc = fuse_get_context();
99b183fb 172 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
1f5596dd 173 pid_t initpid;
4f18a602 174 ssize_t total_len = 0;
1f5596dd
CB
175 char *cache = d->buf;
176 struct load_node *n;
177 int hash;
b7604bf9 178 int cfd;
1ba088ae 179 uint64_t a, b, c;
1f5596dd
CB
180
181 if (offset) {
182 int left;
183
184 if (offset > d->size)
185 return -EINVAL;
186
187 if (!d->cached)
188 return 0;
189
190 left = d->size - offset;
191 total_len = left > size ? size : left;
192 memcpy(buf, cache + offset, total_len);
193
194 return total_len;
195 }
196 if (!loadavg)
197 return read_file_fuse("/proc/loadavg", buf, size, d);
198
199 initpid = lookup_initpid_in_store(fc->pid);
200 if (initpid <= 1 || is_shared_pidns(initpid))
201 initpid = fc->pid;
202
203 cg = get_pid_cgroup(initpid, "cpu");
204 if (!cg)
205 return read_file_fuse("/proc/loadavg", buf, size, d);
206
207 prune_init_slice(cg);
208 hash = calc_hash(cg) % LOAD_SIZE;
209 n = locate_node(cg, hash);
210
211 /* First time */
212 if (n == NULL) {
213 cfd = get_cgroup_fd("cpu");
3fe133df 214 if (cfd < 0) {
1f5596dd
CB
215 /*
216 * In locate_node() above, pthread_rwlock_unlock() isn't used
217 * because delete is not allowed before read has ended.
218 */
219 pthread_rwlock_unlock(&load_hash[hash].rdlock);
3fe133df 220 return read_file_fuse("/proc/loadavg", buf, size, d);
1f5596dd 221 }
b7604bf9 222
cb4bf06b
CB
223 n = must_realloc(NULL, sizeof(struct load_node));
224 n->cg = must_copy_string(cg);
1f5596dd
CB
225 n->avenrun[0] = 0;
226 n->avenrun[1] = 0;
227 n->avenrun[2] = 0;
228 n->run_pid = 0;
229 n->total_pid = 1;
230 n->last_pid = initpid;
231 n->cfd = cfd;
232 insert_node(&n, hash);
233 }
3fe133df
CB
234 a = n->avenrun[0] + (FIXED_1 / 200);
235 b = n->avenrun[1] + (FIXED_1 / 200);
236 c = n->avenrun[2] + (FIXED_1 / 200);
b7604bf9
CB
237 total_len = snprintf(d->buf, d->buflen,
238 "%lu.%02lu "
239 "%lu.%02lu "
240 "%lu.%02lu "
241 "%d/"
3fe133df 242 "%d "
b7604bf9
CB
243 "%d\n",
244 LOAD_INT(a),
245 LOAD_FRAC(a),
246 LOAD_INT(b),
247 LOAD_FRAC(b),
248 LOAD_INT(c),
249 LOAD_FRAC(c),
250 n->run_pid,
251 n->total_pid,
252 n->last_pid);
1f5596dd 253 pthread_rwlock_unlock(&load_hash[hash].rdlock);
b7604bf9
CB
254 if (total_len < 0 || total_len >= d->buflen)
255 return log_error(0, "Failed to write to cache");
256
1f5596dd
CB
257 d->size = (int)total_len;
258 d->cached = 1;
259
260 if (total_len > size)
261 total_len = size;
1f5596dd 262
b7604bf9
CB
263 memcpy(buf, d->buf, total_len);
264 return total_len;
1f5596dd
CB
265}
266
267/*
268 * Find the process pid from cgroup path.
269 * eg:from /sys/fs/cgroup/cpu/docker/containerid/cgroup.procs to find the process pid.
270 * @pid_buf : put pid to pid_buf.
271 * @dpath : the path of cgroup. eg: /docker/containerid or /docker/containerid/child-cgroup ...
272 * @depth : the depth of cgroup in container.
273 * @sum : return the number of pid.
274 * @cfd : the file descriptor of the mounted cgroup. eg: /sys/fs/cgroup/cpu
275 */
e771a80b 276static int calc_pid(char ***pid_buf, const char *rel_path, int depth, int sum, int cfd)
1f5596dd 277{
61ef3c5c 278 __do_free char *line = NULL, *path = NULL;
9b817e41 279 __do_free void *fdopen_cache = NULL;
05b7a16d 280 __do_close int fd = -EBADF;
1f5596dd
CB
281 __do_fclose FILE *f = NULL;
282 __do_closedir DIR *dir = NULL;
283 struct dirent *file;
284 size_t linelen = 0;
1f5596dd 285 int pd;
1f5596dd 286
e771a80b 287 fd = openat(cfd, rel_path, O_RDONLY | O_CLOEXEC);
1f5596dd
CB
288 if (fd < 0)
289 return sum;
290
3fe133df 291 dir = fdopendir(fd);
1f5596dd
CB
292 if (!dir)
293 return sum;
3fe133df
CB
294 /* Transfer ownership to fdopendir(). */
295 move_fd(fd);
1f5596dd
CB
296
297 while (((file = readdir(dir)) != NULL) && depth > 0) {
298 if (strcmp(file->d_name, ".") == 0)
299 continue;
300
301 if (strcmp(file->d_name, "..") == 0)
302 continue;
303
304 if (file->d_type == DT_DIR) {
e771a80b
CB
305 __do_free char *path_next = NULL;
306 path_next = must_make_path(rel_path, "/", file->d_name, NULL);
1f5596dd 307 pd = depth - 1;
e771a80b 308 sum = calc_pid(pid_buf, path_next, pd, sum, cfd);
1f5596dd
CB
309 }
310 }
311
e771a80b 312 path = must_make_path(rel_path, "/cgroup.procs", NULL);
3fe133df 313 fd = openat(cfd, path, O_RDONLY | O_CLOEXEC);
1f5596dd
CB
314 if (fd < 0)
315 return sum;
316
9b817e41 317 f = fdopen_cached(fd, "re", &fdopen_cache);
1f5596dd
CB
318 if (!f)
319 return sum;
320
321 while (getline(&line, &linelen, f) != -1) {
1f342679
CB
322 __do_free char *task_pid = NULL;
323 char **pid;
324
325 task_pid = strdup(line);
326 if (!task_pid)
327 return sum;
328
1f5596dd
CB
329 pid = realloc(*pid_buf, sizeof(char *) * (sum + 1));
330 if (!pid)
331 return sum;
332 *pid_buf = pid;
1f342679 333 *(*pid_buf + sum) = move_ptr(task_pid);
1f5596dd
CB
334 sum++;
335 }
336
337 return sum;
338}
339
340/*
341 * calc_load calculates the load according to the following formula:
342 * load1 = load0 * exp + active * (1 - exp)
343 *
344 * @load1: the new loadavg.
345 * @load0: the former loadavg.
346 * @active: the total number of running pid at this moment.
347 * @exp: the fixed-point defined in the beginning.
348 */
1ba088ae 349static uint64_t calc_load(uint64_t load, uint64_t exp, uint64_t active)
1f5596dd 350{
1ba088ae 351 uint64_t newload;
1f5596dd
CB
352
353 active = active > 0 ? active * FIXED_1 : 0;
354 newload = load * exp + active * (FIXED_1 - exp);
355 if (active >= load)
356 newload += FIXED_1 - 1;
357
358 return newload / FIXED_1;
359}
360
361/*
362 * Return 0 means that container p->cg is closed.
363 * Return -1 means that error occurred in refresh.
364 * Positive num equals the total number of pid.
365 */
3fe133df 366static int refresh_load(struct load_node *p, const char *path)
1f5596dd 367{
bef38939 368 char **idbuf = NULL;
3fe133df
CB
369 char proc_path[STRLITERALLEN("/proc//task//status") +
370 2 * INTTYPE_TO_STRLEN(pid_t) + 1];
1f5596dd
CB
371 int i, ret, run_pid = 0, total_pid = 0, last_pid = 0;
372 size_t linelen = 0;
373 int sum, length;
374 struct dirent *file;
375
3fe133df 376 idbuf = must_realloc(NULL, sizeof(char **));
1f5596dd
CB
377
378 sum = calc_pid(&idbuf, path, DEPTH_DIR, 0, p->cfd);
3fe133df 379 if (!sum)
1f5596dd
CB
380 goto out;
381
382 for (i = 0; i < sum; i++) {
383 __do_closedir DIR *dp = NULL;
384
acff9786 385 length = strlen(idbuf[i]) - 1;
1f5596dd 386 idbuf[i][length] = '\0';
3fe133df
CB
387
388 ret = snprintf(proc_path, sizeof(proc_path), "/proc/%s/task", idbuf[i]);
389 if (ret < 0 || (size_t)ret > sizeof(proc_path)) {
1f5596dd
CB
390 i = sum;
391 sum = -1;
3fe133df
CB
392 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
393 goto err_out;
1f5596dd
CB
394 }
395
396 dp = opendir(proc_path);
3fe133df
CB
397 if (!dp) {
398 lxcfs_error("Failed to open \"%s\"", proc_path);
399 continue;
400 }
b7604bf9 401
1f5596dd 402 while ((file = readdir(dp)) != NULL) {
c0e081ce 403 __do_free char *line = NULL;
1f5596dd
CB
404 __do_fclose FILE *f = NULL;
405
3fe133df 406 if (strcmp(file->d_name, ".") == 0)
1f5596dd 407 continue;
b7604bf9 408
3fe133df 409 if (strcmp(file->d_name, "..") == 0)
1f5596dd 410 continue;
b7604bf9 411
1f5596dd 412 total_pid++;
b7604bf9 413
3fe133df 414 /* We make the biggest pid become last_pid. */
1f5596dd
CB
415 ret = atof(file->d_name);
416 last_pid = (ret > last_pid) ? ret : last_pid;
417
3fe133df
CB
418 ret = snprintf(proc_path, sizeof(proc_path),
419 "/proc/%s/task/%s/status", idbuf[i], file->d_name);
420 if (ret < 0 || (size_t)ret > sizeof(proc_path)) {
1f5596dd
CB
421 i = sum;
422 sum = -1;
3fe133df
CB
423 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
424 goto err_out;
1f5596dd
CB
425 }
426
3fe133df
CB
427 f = fopen(proc_path, "re");
428 if (!f)
429 continue;
430
431 while (getline(&line, &linelen, f) != -1)
432 if ((line[0] == 'S') && (line[1] == 't'))
acff9786 433 break;
3fe133df
CB
434
435 if ((line[7] == 'R') || (line[7] == 'D'))
436 run_pid++;
1f5596dd
CB
437 }
438 }
3fe133df
CB
439
440 /* Calculate the loadavg. */
441 p->avenrun[0] = calc_load(p->avenrun[0], EXP_1, run_pid);
442 p->avenrun[1] = calc_load(p->avenrun[1], EXP_5, run_pid);
443 p->avenrun[2] = calc_load(p->avenrun[2], EXP_15, run_pid);
444 p->run_pid = run_pid;
445 p->total_pid = total_pid;
446 p->last_pid = last_pid;
1f5596dd
CB
447
448err_out:
449 for (; i > 0; i--)
b7604bf9 450 free(idbuf[i - 1]);
1f5596dd
CB
451out:
452 free(idbuf);
453 return sum;
454}
455
456/* Delete the load_node n and return the next node of it. */
457static struct load_node *del_node(struct load_node *n, int locate)
458{
459 struct load_node *g;
460
461 pthread_rwlock_wrlock(&load_hash[locate].rdlock);
462 if (n->next == NULL) {
463 *(n->pre) = NULL;
464 } else {
465 *(n->pre) = n->next;
466 n->next->pre = n->pre;
467 }
468 g = n->next;
469 free_disarm(n->cg);
470 free_disarm(n);
471 pthread_rwlock_unlock(&load_hash[locate].rdlock);
472 return g;
473}
474
475/*
476 * Traverse the hash table and update it.
477 */
478static void *load_begin(void *arg)
479{
480
2a5d697d 481 int first_node, sum;
1f5596dd 482 struct load_node *f;
1f5596dd
CB
483 clock_t time1, time2;
484
3fe133df 485 for (;;) {
1f5596dd
CB
486 if (loadavg_stop == 1)
487 return NULL;
488
489 time1 = clock();
3fe133df 490 for (int i = 0; i < LOAD_SIZE; i++) {
1f5596dd
CB
491 pthread_mutex_lock(&load_hash[i].lock);
492 if (load_hash[i].next == NULL) {
493 pthread_mutex_unlock(&load_hash[i].lock);
494 continue;
495 }
3fe133df 496
1f5596dd
CB
497 f = load_hash[i].next;
498 first_node = 1;
499 while (f) {
500 __do_free char *path = NULL;
501
2a5d697d 502 path = must_make_path_relative(f->cg, NULL);
1f5596dd
CB
503
504 sum = refresh_load(f, path);
505 if (sum == 0)
506 f = del_node(f, i);
507 else
2a5d697d
CB
508 f = f->next;
509
1f5596dd
CB
510 /* load_hash[i].lock locks only on the first node.*/
511 if (first_node == 1) {
512 first_node = 0;
513 pthread_mutex_unlock(&load_hash[i].lock);
514 }
515 }
516 }
517
518 if (loadavg_stop == 1)
519 return NULL;
520
521 time2 = clock();
2a5d697d
CB
522 usleep(FLUSH_TIME * 1000000 -
523 (int)((time2 - time1) * 1000000 / CLOCKS_PER_SEC));
1f5596dd
CB
524 }
525}
526
527/*
528 * init_load initialize the hash table.
529 * Return 0 on success, return -1 on failure.
530 */
531static int init_load(void)
532{
533 int i;
534 int ret;
535
536 for (i = 0; i < LOAD_SIZE; i++) {
537 load_hash[i].next = NULL;
538 ret = pthread_mutex_init(&load_hash[i].lock, NULL);
b7604bf9
CB
539 if (ret) {
540 lxcfs_error("Failed to initialize lock");
1f5596dd
CB
541 goto out3;
542 }
b7604bf9 543
1f5596dd 544 ret = pthread_rwlock_init(&load_hash[i].rdlock, NULL);
b7604bf9
CB
545 if (ret) {
546 lxcfs_error("Failed to initialize rdlock");
1f5596dd
CB
547 goto out2;
548 }
b7604bf9 549
1f5596dd 550 ret = pthread_rwlock_init(&load_hash[i].rilock, NULL);
b7604bf9
CB
551 if (ret) {
552 lxcfs_error("Failed to initialize rilock");
1f5596dd
CB
553 goto out1;
554 }
555 }
b7604bf9 556
1f5596dd 557 return 0;
b7604bf9 558
1f5596dd
CB
559out1:
560 pthread_rwlock_destroy(&load_hash[i].rdlock);
561out2:
562 pthread_mutex_destroy(&load_hash[i].lock);
563out3:
564 while (i > 0) {
565 i--;
566 pthread_mutex_destroy(&load_hash[i].lock);
567 pthread_rwlock_destroy(&load_hash[i].rdlock);
568 pthread_rwlock_destroy(&load_hash[i].rilock);
569 }
b7604bf9 570
1f5596dd
CB
571 return -1;
572}
573
574static void load_free(void)
575{
576 struct load_node *f, *p;
577
578 for (int i = 0; i < LOAD_SIZE; i++) {
579 pthread_mutex_lock(&load_hash[i].lock);
580 pthread_rwlock_wrlock(&load_hash[i].rilock);
581 pthread_rwlock_wrlock(&load_hash[i].rdlock);
582 if (load_hash[i].next == NULL) {
583 pthread_mutex_unlock(&load_hash[i].lock);
584 pthread_mutex_destroy(&load_hash[i].lock);
585 pthread_rwlock_unlock(&load_hash[i].rilock);
586 pthread_rwlock_destroy(&load_hash[i].rilock);
587 pthread_rwlock_unlock(&load_hash[i].rdlock);
588 pthread_rwlock_destroy(&load_hash[i].rdlock);
589 continue;
590 }
591
592 for (f = load_hash[i].next; f;) {
593 free_disarm(f->cg);
594 p = f->next;
595 free_disarm(f);
596 f = p;
597 }
598
599 pthread_mutex_unlock(&load_hash[i].lock);
600 pthread_mutex_destroy(&load_hash[i].lock);
601 pthread_rwlock_unlock(&load_hash[i].rilock);
602 pthread_rwlock_destroy(&load_hash[i].rilock);
603 pthread_rwlock_unlock(&load_hash[i].rdlock);
604 pthread_rwlock_destroy(&load_hash[i].rdlock);
605 }
606}
607
608/* Return a positive number on success, return 0 on failure.*/
609pthread_t load_daemon(int load_use)
610{
611 int ret;
612 pthread_t pid;
613
614 ret = init_load();
b7604bf9
CB
615 if (ret == -1)
616 return log_error(0, "Initialize hash_table fails in load_daemon!");
617
1f5596dd
CB
618 ret = pthread_create(&pid, NULL, load_begin, NULL);
619 if (ret != 0) {
1f5596dd 620 load_free();
b7604bf9 621 return log_error(0, "Create pthread fails in load_daemon!");
1f5596dd 622 }
b7604bf9 623
1f5596dd
CB
624 /* use loadavg, here loadavg = 1*/
625 loadavg = load_use;
626 return pid;
627}
628
629/* Returns 0 on success. */
630int stop_load_daemon(pthread_t pid)
631{
632 int s;
633
634 /* Signal the thread to gracefully stop */
635 loadavg_stop = 1;
636
637 s = pthread_join(pid, NULL); /* Make sure sub thread has been canceled. */
b7604bf9
CB
638 if (s)
639 return log_error(-1, "stop_load_daemon error: failed to join");
1f5596dd
CB
640
641 load_free();
642 loadavg_stop = 0;
643
644 return 0;
645}