]> git.proxmox.com Git - mirror_lxcfs.git/blob - src/proc_loadavg.c
proc_loadavg: replace malloc() with asprintf() in calc_pid()
[mirror_lxcfs.git] / src / proc_loadavg.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6
7 #ifndef FUSE_USE_VERSION
8 #define FUSE_USE_VERSION 26
9 #endif
10
11 #define _FILE_OFFSET_BITS 64
12
13 #define __STDC_FORMAT_MACROS
14 #include <dirent.h>
15 #include <errno.h>
16 #include <fcntl.h>
17 #include <fuse.h>
18 #include <inttypes.h>
19 #include <libgen.h>
20 #include <pthread.h>
21 #include <sched.h>
22 #include <stdarg.h>
23 #include <stdbool.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29 #include <unistd.h>
30 #include <wait.h>
31 #include <linux/magic.h>
32 #include <linux/sched.h>
33 #include <sys/epoll.h>
34 #include <sys/mman.h>
35 #include <sys/mount.h>
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/syscall.h>
39 #include <sys/sysinfo.h>
40 #include <sys/vfs.h>
41
42 #include "bindings.h"
43 #include "config.h"
44 #include "cgroup_fuse.h"
45 #include "cgroups/cgroup.h"
46 #include "cgroups/cgroup_utils.h"
47 #include "memory_utils.h"
48 #include "utils.h"
49
50 /*
51 * This parameter is used for proc_loadavg_read().
52 * 1 means use loadavg, 0 means not use.
53 */
54 static int loadavg = 0;
55
56 /* The function of hash table.*/
57 #define LOAD_SIZE 100 /*the size of hash_table */
58 #define FLUSH_TIME 5 /*the flush rate */
59 #define DEPTH_DIR 3 /*the depth of per cgroup */
60 /* The function of calculate loadavg .*/
61 #define FSHIFT 11 /* nr of bits of precision */
62 #define FIXED_1 (1 << FSHIFT) /* 1.0 as fixed-point */
63 #define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
64 #define EXP_5 2014 /* 1/exp(5sec/5min) */
65 #define EXP_15 2037 /* 1/exp(5sec/15min) */
66 #define LOAD_INT(x) ((x) >> FSHIFT)
67 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
68 static volatile sig_atomic_t loadavg_stop = 0;
69
70 struct load_node {
71 /* cgroup */
72 char *cg;
73 /* Load averages */
74 uint64_t avenrun[3];
75 unsigned int run_pid;
76 unsigned int total_pid;
77 unsigned int last_pid;
78 /* The file descriptor of the mounted cgroup */
79 int cfd;
80 struct load_node *next;
81 struct load_node **pre;
82 };
83
84 struct load_head {
85 /*
86 * The lock is about insert load_node and refresh load_node.To the first
87 * load_node of each hash bucket, insert and refresh in this hash bucket is
88 * mutually exclusive.
89 */
90 pthread_mutex_t lock;
91 /*
92 * The rdlock is about read loadavg and delete load_node.To each hash
93 * bucket, read and delete is mutually exclusive. But at the same time, we
94 * allow paratactic read operation. This rdlock is at list level.
95 */
96 pthread_rwlock_t rdlock;
97 /*
98 * The rilock is about read loadavg and insert load_node.To the first
99 * load_node of each hash bucket, read and insert is mutually exclusive.
100 * But at the same time, we allow paratactic read operation.
101 */
102 pthread_rwlock_t rilock;
103 struct load_node *next;
104 };
105
106 static struct load_head load_hash[LOAD_SIZE]; /* hash table */
107
108 /*
109 * locate_node() finds special node. Not return NULL means success.
110 * It should be noted that rdlock isn't unlocked at the end of code
111 * because this function is used to read special node. Delete is not
112 * allowed before read has ended.
113 * unlock rdlock only in proc_loadavg_read().
114 */
115 static struct load_node *locate_node(char *cg, int locate)
116 {
117 struct load_node *f = NULL;
118 int i = 0;
119
120 pthread_rwlock_rdlock(&load_hash[locate].rilock);
121 pthread_rwlock_rdlock(&load_hash[locate].rdlock);
122 if (load_hash[locate].next == NULL) {
123 pthread_rwlock_unlock(&load_hash[locate].rilock);
124 return f;
125 }
126 f = load_hash[locate].next;
127 pthread_rwlock_unlock(&load_hash[locate].rilock);
128 while (f && ((i = strcmp(f->cg, cg)) != 0))
129 f = f->next;
130 return f;
131 }
132
133 static void insert_node(struct load_node **n, int locate)
134 {
135 struct load_node *f;
136
137 pthread_mutex_lock(&load_hash[locate].lock);
138 pthread_rwlock_wrlock(&load_hash[locate].rilock);
139 f = load_hash[locate].next;
140 load_hash[locate].next = *n;
141
142 (*n)->pre = &(load_hash[locate].next);
143 if (f)
144 f->pre = &((*n)->next);
145 (*n)->next = f;
146 pthread_mutex_unlock(&load_hash[locate].lock);
147 pthread_rwlock_unlock(&load_hash[locate].rilock);
148 }
149
150 int calc_hash(const char *name)
151 {
152 unsigned int hash = 0;
153 unsigned int x = 0;
154
155 /* ELFHash algorithm. */
156 while (*name) {
157 hash = (hash << 4) + *name++;
158 x = hash & 0xf0000000;
159 if (x != 0)
160 hash ^= (x >> 24);
161 hash &= ~x;
162 }
163
164 return (hash & 0x7fffffff);
165 }
166
167 int proc_loadavg_read(char *buf, size_t size, off_t offset,
168 struct fuse_file_info *fi)
169 {
170 __do_free char *cg = NULL;
171 struct fuse_context *fc = fuse_get_context();
172 struct file_info *d = INTTYPE_TO_PTR(fi->fh);
173 pid_t initpid;
174 ssize_t total_len = 0;
175 char *cache = d->buf;
176 struct load_node *n;
177 int hash;
178 int cfd;
179 uint64_t a, b, c;
180
181 if (offset) {
182 int left;
183
184 if (offset > d->size)
185 return -EINVAL;
186
187 if (!d->cached)
188 return 0;
189
190 left = d->size - offset;
191 total_len = left > size ? size : left;
192 memcpy(buf, cache + offset, total_len);
193
194 return total_len;
195 }
196 if (!loadavg)
197 return read_file_fuse("/proc/loadavg", buf, size, d);
198
199 initpid = lookup_initpid_in_store(fc->pid);
200 if (initpid <= 1 || is_shared_pidns(initpid))
201 initpid = fc->pid;
202
203 cg = get_pid_cgroup(initpid, "cpu");
204 if (!cg)
205 return read_file_fuse("/proc/loadavg", buf, size, d);
206
207 prune_init_slice(cg);
208 hash = calc_hash(cg) % LOAD_SIZE;
209 n = locate_node(cg, hash);
210
211 /* First time */
212 if (n == NULL) {
213 cfd = get_cgroup_fd("cpu");
214 if (cfd < 0) {
215 /*
216 * In locate_node() above, pthread_rwlock_unlock() isn't used
217 * because delete is not allowed before read has ended.
218 */
219 pthread_rwlock_unlock(&load_hash[hash].rdlock);
220 return read_file_fuse("/proc/loadavg", buf, size, d);
221 }
222
223 do {
224 n = malloc(sizeof(struct load_node));
225 } while (!n);
226
227 do {
228 n->cg = malloc(strlen(cg) + 1);
229 } while (!n->cg);
230
231 strcpy(n->cg, cg);
232 n->avenrun[0] = 0;
233 n->avenrun[1] = 0;
234 n->avenrun[2] = 0;
235 n->run_pid = 0;
236 n->total_pid = 1;
237 n->last_pid = initpid;
238 n->cfd = cfd;
239 insert_node(&n, hash);
240 }
241 a = n->avenrun[0] + (FIXED_1 / 200);
242 b = n->avenrun[1] + (FIXED_1 / 200);
243 c = n->avenrun[2] + (FIXED_1 / 200);
244 total_len = snprintf(d->buf, d->buflen,
245 "%lu.%02lu "
246 "%lu.%02lu "
247 "%lu.%02lu "
248 "%d/"
249 "%d "
250 "%d\n",
251 LOAD_INT(a),
252 LOAD_FRAC(a),
253 LOAD_INT(b),
254 LOAD_FRAC(b),
255 LOAD_INT(c),
256 LOAD_FRAC(c),
257 n->run_pid,
258 n->total_pid,
259 n->last_pid);
260 pthread_rwlock_unlock(&load_hash[hash].rdlock);
261 if (total_len < 0 || total_len >= d->buflen)
262 return log_error(0, "Failed to write to cache");
263
264 d->size = (int)total_len;
265 d->cached = 1;
266
267 if (total_len > size)
268 total_len = size;
269
270 memcpy(buf, d->buf, total_len);
271 return total_len;
272 }
273
274 /*
275 * Find the process pid from cgroup path.
276 * eg:from /sys/fs/cgroup/cpu/docker/containerid/cgroup.procs to find the process pid.
277 * @pid_buf : put pid to pid_buf.
278 * @dpath : the path of cgroup. eg: /docker/containerid or /docker/containerid/child-cgroup ...
279 * @depth : the depth of cgroup in container.
280 * @sum : return the number of pid.
281 * @cfd : the file descriptor of the mounted cgroup. eg: /sys/fs/cgroup/cpu
282 */
283 static int calc_pid(char ***pid_buf, const char *dpath, int depth, int sum, int cfd)
284 {
285 __do_free char *line = NULL, *path = NULL;
286 __do_free void *fdopen_cache = NULL;
287 __do_close int fd = -EBADF;
288 __do_fclose FILE *f = NULL;
289 __do_closedir DIR *dir = NULL;
290 struct dirent *file;
291 size_t linelen = 0;
292 int pd;
293 char **pid;
294
295 /* path = dpath + "/cgroup.procs" + /0 */
296 path = malloc(strlen(dpath) + 20);
297 if (!path)
298 return sum;
299
300 strcpy(path, dpath);
301 fd = openat(cfd, path, O_RDONLY | O_CLOEXEC);
302 if (fd < 0)
303 return sum;
304
305 dir = fdopendir(fd);
306 if (!dir)
307 return sum;
308 /* Transfer ownership to fdopendir(). */
309 move_fd(fd);
310
311 while (((file = readdir(dir)) != NULL) && depth > 0) {
312 if (strcmp(file->d_name, ".") == 0)
313 continue;
314
315 if (strcmp(file->d_name, "..") == 0)
316 continue;
317
318 if (file->d_type == DT_DIR) {
319 __do_free char *path_dir = NULL;
320 int ret;
321
322 /* path + '/' + d_name +/0 */
323 ret = asprintf(&path_dir, "%s/%s", path, file->d_name);
324 if (ret < 0) {
325 path_dir = NULL;
326 return sum;
327 }
328
329 pd = depth - 1;
330 sum = calc_pid(pid_buf, path_dir, pd, sum, cfd);
331 }
332 }
333
334 strcat(path, "/cgroup.procs");
335 fd = openat(cfd, path, O_RDONLY | O_CLOEXEC);
336 if (fd < 0)
337 return sum;
338
339 f = fdopen_cached(fd, "re", &fdopen_cache);
340 if (!f)
341 return sum;
342
343 while (getline(&line, &linelen, f) != -1) {
344 pid = realloc(*pid_buf, sizeof(char *) * (sum + 1));
345 if (!pid)
346 return sum;
347 *pid_buf = pid;
348
349 *(*pid_buf + sum) = malloc(strlen(line) + 1);
350 if (!*(*pid_buf + sum))
351 return sum;
352
353 strcpy(*(*pid_buf + sum), line);
354 sum++;
355 }
356
357 return sum;
358 }
359
360 /*
361 * calc_load calculates the load according to the following formula:
362 * load1 = load0 * exp + active * (1 - exp)
363 *
364 * @load1: the new loadavg.
365 * @load0: the former loadavg.
366 * @active: the total number of running pid at this moment.
367 * @exp: the fixed-point defined in the beginning.
368 */
369 static uint64_t calc_load(uint64_t load, uint64_t exp, uint64_t active)
370 {
371 uint64_t newload;
372
373 active = active > 0 ? active * FIXED_1 : 0;
374 newload = load * exp + active * (FIXED_1 - exp);
375 if (active >= load)
376 newload += FIXED_1 - 1;
377
378 return newload / FIXED_1;
379 }
380
381 /*
382 * Return 0 means that container p->cg is closed.
383 * Return -1 means that error occurred in refresh.
384 * Positive num equals the total number of pid.
385 */
386 static int refresh_load(struct load_node *p, const char *path)
387 {
388 char **idbuf = NULL;
389 char proc_path[STRLITERALLEN("/proc//task//status") +
390 2 * INTTYPE_TO_STRLEN(pid_t) + 1];
391 int i, ret, run_pid = 0, total_pid = 0, last_pid = 0;
392 size_t linelen = 0;
393 int sum, length;
394 struct dirent *file;
395
396 idbuf = must_realloc(NULL, sizeof(char **));
397
398 sum = calc_pid(&idbuf, path, DEPTH_DIR, 0, p->cfd);
399 if (!sum)
400 goto out;
401
402 for (i = 0; i < sum; i++) {
403 __do_closedir DIR *dp = NULL;
404
405 length = strlen(idbuf[i]) - 1;
406 idbuf[i][length] = '\0';
407
408 ret = snprintf(proc_path, sizeof(proc_path), "/proc/%s/task", idbuf[i]);
409 if (ret < 0 || (size_t)ret > sizeof(proc_path)) {
410 i = sum;
411 sum = -1;
412 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
413 goto err_out;
414 }
415
416 dp = opendir(proc_path);
417 if (!dp) {
418 lxcfs_error("Failed to open \"%s\"", proc_path);
419 continue;
420 }
421
422 while ((file = readdir(dp)) != NULL) {
423 __do_free char *line = NULL;
424 __do_fclose FILE *f = NULL;
425
426 if (strcmp(file->d_name, ".") == 0)
427 continue;
428
429 if (strcmp(file->d_name, "..") == 0)
430 continue;
431
432 total_pid++;
433
434 /* We make the biggest pid become last_pid. */
435 ret = atof(file->d_name);
436 last_pid = (ret > last_pid) ? ret : last_pid;
437
438 ret = snprintf(proc_path, sizeof(proc_path),
439 "/proc/%s/task/%s/status", idbuf[i], file->d_name);
440 if (ret < 0 || (size_t)ret > sizeof(proc_path)) {
441 i = sum;
442 sum = -1;
443 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
444 goto err_out;
445 }
446
447 f = fopen(proc_path, "re");
448 if (!f)
449 continue;
450
451 while (getline(&line, &linelen, f) != -1)
452 if ((line[0] == 'S') && (line[1] == 't'))
453 break;
454
455 if ((line[7] == 'R') || (line[7] == 'D'))
456 run_pid++;
457 }
458 }
459
460 /* Calculate the loadavg. */
461 p->avenrun[0] = calc_load(p->avenrun[0], EXP_1, run_pid);
462 p->avenrun[1] = calc_load(p->avenrun[1], EXP_5, run_pid);
463 p->avenrun[2] = calc_load(p->avenrun[2], EXP_15, run_pid);
464 p->run_pid = run_pid;
465 p->total_pid = total_pid;
466 p->last_pid = last_pid;
467
468 err_out:
469 for (; i > 0; i--)
470 free(idbuf[i - 1]);
471 out:
472 free(idbuf);
473 return sum;
474 }
475
476 /* Delete the load_node n and return the next node of it. */
477 static struct load_node *del_node(struct load_node *n, int locate)
478 {
479 struct load_node *g;
480
481 pthread_rwlock_wrlock(&load_hash[locate].rdlock);
482 if (n->next == NULL) {
483 *(n->pre) = NULL;
484 } else {
485 *(n->pre) = n->next;
486 n->next->pre = n->pre;
487 }
488 g = n->next;
489 free_disarm(n->cg);
490 free_disarm(n);
491 pthread_rwlock_unlock(&load_hash[locate].rdlock);
492 return g;
493 }
494
495 /*
496 * Traverse the hash table and update it.
497 */
498 static void *load_begin(void *arg)
499 {
500
501 int first_node, sum;
502 struct load_node *f;
503 clock_t time1, time2;
504
505 for (;;) {
506 if (loadavg_stop == 1)
507 return NULL;
508
509 time1 = clock();
510 for (int i = 0; i < LOAD_SIZE; i++) {
511 pthread_mutex_lock(&load_hash[i].lock);
512 if (load_hash[i].next == NULL) {
513 pthread_mutex_unlock(&load_hash[i].lock);
514 continue;
515 }
516
517 f = load_hash[i].next;
518 first_node = 1;
519 while (f) {
520 __do_free char *path = NULL;
521
522 path = must_make_path_relative(f->cg, NULL);
523
524 sum = refresh_load(f, path);
525 if (sum == 0)
526 f = del_node(f, i);
527 else
528 f = f->next;
529
530 /* load_hash[i].lock locks only on the first node.*/
531 if (first_node == 1) {
532 first_node = 0;
533 pthread_mutex_unlock(&load_hash[i].lock);
534 }
535 }
536 }
537
538 if (loadavg_stop == 1)
539 return NULL;
540
541 time2 = clock();
542 usleep(FLUSH_TIME * 1000000 -
543 (int)((time2 - time1) * 1000000 / CLOCKS_PER_SEC));
544 }
545 }
546
547 /*
548 * init_load initialize the hash table.
549 * Return 0 on success, return -1 on failure.
550 */
551 static int init_load(void)
552 {
553 int i;
554 int ret;
555
556 for (i = 0; i < LOAD_SIZE; i++) {
557 load_hash[i].next = NULL;
558 ret = pthread_mutex_init(&load_hash[i].lock, NULL);
559 if (ret) {
560 lxcfs_error("Failed to initialize lock");
561 goto out3;
562 }
563
564 ret = pthread_rwlock_init(&load_hash[i].rdlock, NULL);
565 if (ret) {
566 lxcfs_error("Failed to initialize rdlock");
567 goto out2;
568 }
569
570 ret = pthread_rwlock_init(&load_hash[i].rilock, NULL);
571 if (ret) {
572 lxcfs_error("Failed to initialize rilock");
573 goto out1;
574 }
575 }
576
577 return 0;
578
579 out1:
580 pthread_rwlock_destroy(&load_hash[i].rdlock);
581 out2:
582 pthread_mutex_destroy(&load_hash[i].lock);
583 out3:
584 while (i > 0) {
585 i--;
586 pthread_mutex_destroy(&load_hash[i].lock);
587 pthread_rwlock_destroy(&load_hash[i].rdlock);
588 pthread_rwlock_destroy(&load_hash[i].rilock);
589 }
590
591 return -1;
592 }
593
594 static void load_free(void)
595 {
596 struct load_node *f, *p;
597
598 for (int i = 0; i < LOAD_SIZE; i++) {
599 pthread_mutex_lock(&load_hash[i].lock);
600 pthread_rwlock_wrlock(&load_hash[i].rilock);
601 pthread_rwlock_wrlock(&load_hash[i].rdlock);
602 if (load_hash[i].next == NULL) {
603 pthread_mutex_unlock(&load_hash[i].lock);
604 pthread_mutex_destroy(&load_hash[i].lock);
605 pthread_rwlock_unlock(&load_hash[i].rilock);
606 pthread_rwlock_destroy(&load_hash[i].rilock);
607 pthread_rwlock_unlock(&load_hash[i].rdlock);
608 pthread_rwlock_destroy(&load_hash[i].rdlock);
609 continue;
610 }
611
612 for (f = load_hash[i].next; f;) {
613 free_disarm(f->cg);
614 p = f->next;
615 free_disarm(f);
616 f = p;
617 }
618
619 pthread_mutex_unlock(&load_hash[i].lock);
620 pthread_mutex_destroy(&load_hash[i].lock);
621 pthread_rwlock_unlock(&load_hash[i].rilock);
622 pthread_rwlock_destroy(&load_hash[i].rilock);
623 pthread_rwlock_unlock(&load_hash[i].rdlock);
624 pthread_rwlock_destroy(&load_hash[i].rdlock);
625 }
626 }
627
628 /* Return a positive number on success, return 0 on failure.*/
629 pthread_t load_daemon(int load_use)
630 {
631 int ret;
632 pthread_t pid;
633
634 ret = init_load();
635 if (ret == -1)
636 return log_error(0, "Initialize hash_table fails in load_daemon!");
637
638 ret = pthread_create(&pid, NULL, load_begin, NULL);
639 if (ret != 0) {
640 load_free();
641 return log_error(0, "Create pthread fails in load_daemon!");
642 }
643
644 /* use loadavg, here loadavg = 1*/
645 loadavg = load_use;
646 return pid;
647 }
648
649 /* Returns 0 on success. */
650 int stop_load_daemon(pthread_t pid)
651 {
652 int s;
653
654 /* Signal the thread to gracefully stop */
655 loadavg_stop = 1;
656
657 s = pthread_join(pid, NULL); /* Make sure sub thread has been canceled. */
658 if (s)
659 return log_error(-1, "stop_load_daemon error: failed to join");
660
661 load_free();
662 loadavg_stop = 0;
663
664 return 0;
665 }