]>
git.proxmox.com Git - mirror_lxcfs.git/blob - src/proc_loadavg.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
7 #ifndef FUSE_USE_VERSION
8 #define FUSE_USE_VERSION 26
11 #define _FILE_OFFSET_BITS 64
13 #define __STDC_FORMAT_MACROS
31 #include <linux/magic.h>
32 #include <linux/sched.h>
33 #include <sys/epoll.h>
35 #include <sys/mount.h>
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/syscall.h>
39 #include <sys/sysinfo.h>
44 #include "cgroup_fuse.h"
45 #include "cgroups/cgroup.h"
46 #include "cgroups/cgroup_utils.h"
47 #include "memory_utils.h"
51 * This parameter is used for proc_loadavg_read().
52 * 1 means use loadavg, 0 means not use.
54 static int loadavg
= 0;
56 /* The function of hash table.*/
57 #define LOAD_SIZE 100 /*the size of hash_table */
58 #define FLUSH_TIME 5 /*the flush rate */
59 #define DEPTH_DIR 3 /*the depth of per cgroup */
60 /* The function of calculate loadavg .*/
61 #define FSHIFT 11 /* nr of bits of precision */
62 #define FIXED_1 (1 << FSHIFT) /* 1.0 as fixed-point */
63 #define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
64 #define EXP_5 2014 /* 1/exp(5sec/5min) */
65 #define EXP_15 2037 /* 1/exp(5sec/15min) */
66 #define LOAD_INT(x) ((x) >> FSHIFT)
67 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
68 static volatile sig_atomic_t loadavg_stop
= 0;
76 unsigned int total_pid
;
77 unsigned int last_pid
;
78 /* The file descriptor of the mounted cgroup */
80 struct load_node
*next
;
81 struct load_node
**pre
;
86 * The lock is about insert load_node and refresh load_node.To the first
87 * load_node of each hash bucket, insert and refresh in this hash bucket is
92 * The rdlock is about read loadavg and delete load_node.To each hash
93 * bucket, read and delete is mutually exclusive. But at the same time, we
94 * allow paratactic read operation. This rdlock is at list level.
96 pthread_rwlock_t rdlock
;
98 * The rilock is about read loadavg and insert load_node.To the first
99 * load_node of each hash bucket, read and insert is mutually exclusive.
100 * But at the same time, we allow paratactic read operation.
102 pthread_rwlock_t rilock
;
103 struct load_node
*next
;
106 static struct load_head load_hash
[LOAD_SIZE
]; /* hash table */
109 * locate_node() finds special node. Not return NULL means success.
110 * It should be noted that rdlock isn't unlocked at the end of code
111 * because this function is used to read special node. Delete is not
112 * allowed before read has ended.
113 * unlock rdlock only in proc_loadavg_read().
115 static struct load_node
*locate_node(char *cg
, int locate
)
117 struct load_node
*f
= NULL
;
120 pthread_rwlock_rdlock(&load_hash
[locate
].rilock
);
121 pthread_rwlock_rdlock(&load_hash
[locate
].rdlock
);
122 if (load_hash
[locate
].next
== NULL
) {
123 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
126 f
= load_hash
[locate
].next
;
127 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
128 while (f
&& ((i
= strcmp(f
->cg
, cg
)) != 0))
133 static void insert_node(struct load_node
**n
, int locate
)
137 pthread_mutex_lock(&load_hash
[locate
].lock
);
138 pthread_rwlock_wrlock(&load_hash
[locate
].rilock
);
139 f
= load_hash
[locate
].next
;
140 load_hash
[locate
].next
= *n
;
142 (*n
)->pre
= &(load_hash
[locate
].next
);
144 f
->pre
= &((*n
)->next
);
146 pthread_mutex_unlock(&load_hash
[locate
].lock
);
147 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
150 int calc_hash(const char *name
)
152 unsigned int hash
= 0;
155 /* ELFHash algorithm. */
157 hash
= (hash
<< 4) + *name
++;
158 x
= hash
& 0xf0000000;
164 return (hash
& 0x7fffffff);
167 int proc_loadavg_read(char *buf
, size_t size
, off_t offset
,
168 struct fuse_file_info
*fi
)
170 __do_free
char *cg
= NULL
;
171 struct fuse_context
*fc
= fuse_get_context();
172 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
174 ssize_t total_len
= 0;
175 char *cache
= d
->buf
;
184 if (offset
> d
->size
)
190 left
= d
->size
- offset
;
191 total_len
= left
> size
? size
: left
;
192 memcpy(buf
, cache
+ offset
, total_len
);
197 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
199 initpid
= lookup_initpid_in_store(fc
->pid
);
200 if (initpid
<= 1 || is_shared_pidns(initpid
))
203 cg
= get_pid_cgroup(initpid
, "cpu");
205 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
207 prune_init_slice(cg
);
208 hash
= calc_hash(cg
) % LOAD_SIZE
;
209 n
= locate_node(cg
, hash
);
213 cfd
= get_cgroup_fd("cpu");
216 * In locate_node() above, pthread_rwlock_unlock() isn't used
217 * because delete is not allowed before read has ended.
219 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
220 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
224 n
= malloc(sizeof(struct load_node
));
228 n
->cg
= malloc(strlen(cg
) + 1);
237 n
->last_pid
= initpid
;
239 insert_node(&n
, hash
);
241 a
= n
->avenrun
[0] + (FIXED_1
/ 200);
242 b
= n
->avenrun
[1] + (FIXED_1
/ 200);
243 c
= n
->avenrun
[2] + (FIXED_1
/ 200);
244 total_len
= snprintf(d
->buf
, d
->buflen
,
260 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
261 if (total_len
< 0 || total_len
>= d
->buflen
)
262 return log_error(0, "Failed to write to cache");
264 d
->size
= (int)total_len
;
267 if (total_len
> size
)
270 memcpy(buf
, d
->buf
, total_len
);
275 * Find the process pid from cgroup path.
276 * eg:from /sys/fs/cgroup/cpu/docker/containerid/cgroup.procs to find the process pid.
277 * @pid_buf : put pid to pid_buf.
278 * @dpath : the path of cgroup. eg: /docker/containerid or /docker/containerid/child-cgroup ...
279 * @depth : the depth of cgroup in container.
280 * @sum : return the number of pid.
281 * @cfd : the file descriptor of the mounted cgroup. eg: /sys/fs/cgroup/cpu
283 static int calc_pid(char ***pid_buf
, const char *dpath
, int depth
, int sum
, int cfd
)
285 __do_free
char *line
= NULL
, *path
= NULL
;
286 __do_free
void *fdopen_cache
= NULL
;
287 __do_close
int fd
= -EBADF
;
288 __do_fclose
FILE *f
= NULL
;
289 __do_closedir
DIR *dir
= NULL
;
295 /* path = dpath + "/cgroup.procs" + /0 */
296 path
= malloc(strlen(dpath
) + 20);
301 fd
= openat(cfd
, path
, O_RDONLY
| O_CLOEXEC
);
308 /* Transfer ownership to fdopendir(). */
311 while (((file
= readdir(dir
)) != NULL
) && depth
> 0) {
312 if (strcmp(file
->d_name
, ".") == 0)
315 if (strcmp(file
->d_name
, "..") == 0)
318 if (file
->d_type
== DT_DIR
) {
319 __do_free
char *path_dir
= NULL
;
322 /* path + '/' + d_name +/0 */
323 ret
= asprintf(&path_dir
, "%s/%s", path
, file
->d_name
);
330 sum
= calc_pid(pid_buf
, path_dir
, pd
, sum
, cfd
);
334 strcat(path
, "/cgroup.procs");
335 fd
= openat(cfd
, path
, O_RDONLY
| O_CLOEXEC
);
339 f
= fdopen_cached(fd
, "re", &fdopen_cache
);
343 while (getline(&line
, &linelen
, f
) != -1) {
344 pid
= realloc(*pid_buf
, sizeof(char *) * (sum
+ 1));
349 *(*pid_buf
+ sum
) = malloc(strlen(line
) + 1);
350 if (!*(*pid_buf
+ sum
))
353 strcpy(*(*pid_buf
+ sum
), line
);
361 * calc_load calculates the load according to the following formula:
362 * load1 = load0 * exp + active * (1 - exp)
364 * @load1: the new loadavg.
365 * @load0: the former loadavg.
366 * @active: the total number of running pid at this moment.
367 * @exp: the fixed-point defined in the beginning.
369 static uint64_t calc_load(uint64_t load
, uint64_t exp
, uint64_t active
)
373 active
= active
> 0 ? active
* FIXED_1
: 0;
374 newload
= load
* exp
+ active
* (FIXED_1
- exp
);
376 newload
+= FIXED_1
- 1;
378 return newload
/ FIXED_1
;
382 * Return 0 means that container p->cg is closed.
383 * Return -1 means that error occurred in refresh.
384 * Positive num equals the total number of pid.
386 static int refresh_load(struct load_node
*p
, const char *path
)
389 char proc_path
[STRLITERALLEN("/proc//task//status") +
390 2 * INTTYPE_TO_STRLEN(pid_t
) + 1];
391 int i
, ret
, run_pid
= 0, total_pid
= 0, last_pid
= 0;
396 idbuf
= must_realloc(NULL
, sizeof(char **));
398 sum
= calc_pid(&idbuf
, path
, DEPTH_DIR
, 0, p
->cfd
);
402 for (i
= 0; i
< sum
; i
++) {
403 __do_closedir
DIR *dp
= NULL
;
405 length
= strlen(idbuf
[i
]) - 1;
406 idbuf
[i
][length
] = '\0';
408 ret
= snprintf(proc_path
, sizeof(proc_path
), "/proc/%s/task", idbuf
[i
]);
409 if (ret
< 0 || (size_t)ret
> sizeof(proc_path
)) {
412 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
416 dp
= opendir(proc_path
);
418 lxcfs_error("Failed to open \"%s\"", proc_path
);
422 while ((file
= readdir(dp
)) != NULL
) {
423 __do_free
char *line
= NULL
;
424 __do_fclose
FILE *f
= NULL
;
426 if (strcmp(file
->d_name
, ".") == 0)
429 if (strcmp(file
->d_name
, "..") == 0)
434 /* We make the biggest pid become last_pid. */
435 ret
= atof(file
->d_name
);
436 last_pid
= (ret
> last_pid
) ? ret
: last_pid
;
438 ret
= snprintf(proc_path
, sizeof(proc_path
),
439 "/proc/%s/task/%s/status", idbuf
[i
], file
->d_name
);
440 if (ret
< 0 || (size_t)ret
> sizeof(proc_path
)) {
443 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
447 f
= fopen(proc_path
, "re");
451 while (getline(&line
, &linelen
, f
) != -1)
452 if ((line
[0] == 'S') && (line
[1] == 't'))
455 if ((line
[7] == 'R') || (line
[7] == 'D'))
460 /* Calculate the loadavg. */
461 p
->avenrun
[0] = calc_load(p
->avenrun
[0], EXP_1
, run_pid
);
462 p
->avenrun
[1] = calc_load(p
->avenrun
[1], EXP_5
, run_pid
);
463 p
->avenrun
[2] = calc_load(p
->avenrun
[2], EXP_15
, run_pid
);
464 p
->run_pid
= run_pid
;
465 p
->total_pid
= total_pid
;
466 p
->last_pid
= last_pid
;
476 /* Delete the load_node n and return the next node of it. */
477 static struct load_node
*del_node(struct load_node
*n
, int locate
)
481 pthread_rwlock_wrlock(&load_hash
[locate
].rdlock
);
482 if (n
->next
== NULL
) {
486 n
->next
->pre
= n
->pre
;
491 pthread_rwlock_unlock(&load_hash
[locate
].rdlock
);
496 * Traverse the hash table and update it.
498 static void *load_begin(void *arg
)
503 clock_t time1
, time2
;
506 if (loadavg_stop
== 1)
510 for (int i
= 0; i
< LOAD_SIZE
; i
++) {
511 pthread_mutex_lock(&load_hash
[i
].lock
);
512 if (load_hash
[i
].next
== NULL
) {
513 pthread_mutex_unlock(&load_hash
[i
].lock
);
517 f
= load_hash
[i
].next
;
520 __do_free
char *path
= NULL
;
522 path
= must_make_path_relative(f
->cg
, NULL
);
524 sum
= refresh_load(f
, path
);
530 /* load_hash[i].lock locks only on the first node.*/
531 if (first_node
== 1) {
533 pthread_mutex_unlock(&load_hash
[i
].lock
);
538 if (loadavg_stop
== 1)
542 usleep(FLUSH_TIME
* 1000000 -
543 (int)((time2
- time1
) * 1000000 / CLOCKS_PER_SEC
));
548 * init_load initialize the hash table.
549 * Return 0 on success, return -1 on failure.
551 static int init_load(void)
556 for (i
= 0; i
< LOAD_SIZE
; i
++) {
557 load_hash
[i
].next
= NULL
;
558 ret
= pthread_mutex_init(&load_hash
[i
].lock
, NULL
);
560 lxcfs_error("Failed to initialize lock");
564 ret
= pthread_rwlock_init(&load_hash
[i
].rdlock
, NULL
);
566 lxcfs_error("Failed to initialize rdlock");
570 ret
= pthread_rwlock_init(&load_hash
[i
].rilock
, NULL
);
572 lxcfs_error("Failed to initialize rilock");
580 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
582 pthread_mutex_destroy(&load_hash
[i
].lock
);
586 pthread_mutex_destroy(&load_hash
[i
].lock
);
587 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
588 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
594 static void load_free(void)
596 struct load_node
*f
, *p
;
598 for (int i
= 0; i
< LOAD_SIZE
; i
++) {
599 pthread_mutex_lock(&load_hash
[i
].lock
);
600 pthread_rwlock_wrlock(&load_hash
[i
].rilock
);
601 pthread_rwlock_wrlock(&load_hash
[i
].rdlock
);
602 if (load_hash
[i
].next
== NULL
) {
603 pthread_mutex_unlock(&load_hash
[i
].lock
);
604 pthread_mutex_destroy(&load_hash
[i
].lock
);
605 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
606 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
607 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
608 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
612 for (f
= load_hash
[i
].next
; f
;) {
619 pthread_mutex_unlock(&load_hash
[i
].lock
);
620 pthread_mutex_destroy(&load_hash
[i
].lock
);
621 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
622 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
623 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
624 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
628 /* Return a positive number on success, return 0 on failure.*/
629 pthread_t
load_daemon(int load_use
)
636 return log_error(0, "Initialize hash_table fails in load_daemon!");
638 ret
= pthread_create(&pid
, NULL
, load_begin
, NULL
);
641 return log_error(0, "Create pthread fails in load_daemon!");
644 /* use loadavg, here loadavg = 1*/
649 /* Returns 0 on success. */
650 int stop_load_daemon(pthread_t pid
)
654 /* Signal the thread to gracefully stop */
657 s
= pthread_join(pid
, NULL
); /* Make sure sub thread has been canceled. */
659 return log_error(-1, "stop_load_daemon error: failed to join");