]>
git.proxmox.com Git - mirror_lxcfs.git/blob - src/proc_loadavg.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
21 #include <linux/magic.h>
22 #include <linux/sched.h>
23 #include <sys/epoll.h>
25 #include <sys/mount.h>
26 #include <sys/param.h>
27 #include <sys/socket.h>
28 #include <sys/syscall.h>
29 #include <sys/sysinfo.h>
32 #include "proc_loadavg.h"
35 #include "cgroup_fuse.h"
36 #include "cgroups/cgroup.h"
37 #include "cgroups/cgroup_utils.h"
38 #include "memory_utils.h"
42 * This parameter is used for proc_loadavg_read().
43 * 1 means use loadavg, 0 means not use.
45 static int loadavg
= 0;
47 /* The function of hash table.*/
48 #define LOAD_SIZE 100 /*the size of hash_table */
49 #define FLUSH_TIME 5 /*the flush rate */
50 #define DEPTH_DIR 3 /*the depth of per cgroup */
51 /* The function of calculate loadavg .*/
52 #define FSHIFT (uint64_t)11 /* nr of bits of precision */
53 #define FIXED_1 ((uint64_t)1 << FSHIFT) /* 1.0 as fixed-point */
54 #define EXP_1 (uint64_t)1884 /* 1/exp(5sec/1min) as fixed-point */
55 #define EXP_5 (uint64_t)2014 /* 1/exp(5sec/5min) */
56 #define EXP_15 (uint64_t)2037 /* 1/exp(5sec/15min) */
57 #define LOAD_INT(x) ((x) >> FSHIFT)
58 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1 - 1)) * (uint64_t)100)
59 static volatile sig_atomic_t loadavg_stop
= 0;
67 unsigned int total_pid
;
68 unsigned int last_pid
;
69 /* The file descriptor of the mounted cgroup */
71 struct load_node
*next
;
72 struct load_node
**pre
;
77 * The lock is about insert load_node and refresh load_node.To the first
78 * load_node of each hash bucket, insert and refresh in this hash bucket is
83 * The rdlock is about read loadavg and delete load_node.To each hash
84 * bucket, read and delete is mutually exclusive. But at the same time, we
85 * allow paratactic read operation. This rdlock is at list level.
87 pthread_rwlock_t rdlock
;
89 * The rilock is about read loadavg and insert load_node.To the first
90 * load_node of each hash bucket, read and insert is mutually exclusive.
91 * But at the same time, we allow paratactic read operation.
93 pthread_rwlock_t rilock
;
94 struct load_node
*next
;
97 static struct load_head load_hash
[LOAD_SIZE
]; /* hash table */
100 * locate_node() finds special node. Not return NULL means success.
101 * It should be noted that rdlock isn't unlocked at the end of code
102 * because this function is used to read special node. Delete is not
103 * allowed before read has ended.
104 * unlock rdlock only in proc_loadavg_read().
106 static struct load_node
*locate_node(char *cg
, int locate
)
108 struct load_node
*f
= NULL
;
111 pthread_rwlock_rdlock(&load_hash
[locate
].rilock
);
112 pthread_rwlock_rdlock(&load_hash
[locate
].rdlock
);
113 if (load_hash
[locate
].next
== NULL
) {
114 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
117 f
= load_hash
[locate
].next
;
118 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
119 while (f
&& ((i
= strcmp(f
->cg
, cg
)) != 0))
124 static void insert_node(struct load_node
**n
, int locate
)
128 pthread_mutex_lock(&load_hash
[locate
].lock
);
129 pthread_rwlock_wrlock(&load_hash
[locate
].rilock
);
130 f
= load_hash
[locate
].next
;
131 load_hash
[locate
].next
= *n
;
133 (*n
)->pre
= &(load_hash
[locate
].next
);
135 f
->pre
= &((*n
)->next
);
137 pthread_mutex_unlock(&load_hash
[locate
].lock
);
138 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
141 int calc_hash(const char *name
)
143 unsigned int hash
= 0;
146 /* ELFHash algorithm. */
148 hash
= (hash
<< 4) + *name
++;
149 x
= hash
& 0xf0000000;
155 return (hash
& 0x7fffffff);
158 int proc_loadavg_read(char *buf
, size_t size
, off_t offset
,
159 struct fuse_file_info
*fi
)
161 __do_free
char *cg
= NULL
;
162 struct fuse_context
*fc
= fuse_get_context();
163 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
165 ssize_t total_len
= 0;
174 if (offset
> d
->size
)
180 left
= d
->size
- offset
;
181 total_len
= left
> size
? size
: left
;
182 memcpy(buf
, d
->buf
+ offset
, total_len
);
187 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
189 initpid
= lookup_initpid_in_store(fc
->pid
);
190 if (initpid
<= 1 || is_shared_pidns(initpid
))
193 cg
= get_pid_cgroup(initpid
, "cpu");
195 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
197 prune_init_slice(cg
);
198 hash
= calc_hash(cg
) % LOAD_SIZE
;
199 n
= locate_node(cg
, hash
);
203 cfd
= get_cgroup_fd("cpu");
206 * In locate_node() above, pthread_rwlock_unlock() isn't used
207 * because delete is not allowed before read has ended.
209 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
210 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
213 n
= must_realloc(NULL
, sizeof(struct load_node
));
214 n
->cg
= move_ptr(cg
);
220 n
->last_pid
= initpid
;
222 insert_node(&n
, hash
);
224 a
= n
->avenrun
[0] + (FIXED_1
/ 200);
225 b
= n
->avenrun
[1] + (FIXED_1
/ 200);
226 c
= n
->avenrun
[2] + (FIXED_1
/ 200);
227 total_len
= snprintf(d
->buf
, d
->buflen
,
228 "%" PRIu64
".%02" PRIu64
" "
230 "%" PRIu64
".%02" PRIu64
" "
243 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
244 if (total_len
< 0 || total_len
>= d
->buflen
)
245 return log_error(0, "Failed to write to cache");
247 d
->size
= (int)total_len
;
250 if ((size_t)total_len
> size
)
253 memcpy(buf
, d
->buf
, total_len
);
258 * Find the process pid from cgroup path.
259 * eg:from /sys/fs/cgroup/cpu/docker/containerid/cgroup.procs to find the process pid.
260 * @pid_buf : put pid to pid_buf.
261 * @dpath : the path of cgroup. eg: /docker/containerid or /docker/containerid/child-cgroup ...
262 * @depth : the depth of cgroup in container.
263 * @sum : return the number of pid.
264 * @cfd : the file descriptor of the mounted cgroup. eg: /sys/fs/cgroup/cpu
266 static int calc_pid(char ***pid_buf
, const char *rel_path
, int depth
, int sum
, int cfd
)
268 __do_free
char *line
= NULL
, *path
= NULL
;
269 __do_free
void *fdopen_cache
= NULL
;
270 __do_close
int fd
= -EBADF
;
271 __do_fclose
FILE *f
= NULL
;
272 __do_closedir
DIR *dir
= NULL
;
277 fd
= openat(cfd
, rel_path
, O_RDONLY
| O_CLOEXEC
);
284 /* Transfer ownership to fdopendir(). */
287 while (((file
= readdir(dir
)) != NULL
) && depth
> 0) {
288 if (strcmp(file
->d_name
, ".") == 0)
291 if (strcmp(file
->d_name
, "..") == 0)
294 if (file
->d_type
== DT_DIR
) {
295 __do_free
char *path_next
= NULL
;
296 path_next
= must_make_path(rel_path
, "/", file
->d_name
, NULL
);
298 sum
= calc_pid(pid_buf
, path_next
, pd
, sum
, cfd
);
302 path
= must_make_path(rel_path
, "/cgroup.procs", NULL
);
303 fd
= openat(cfd
, path
, O_RDONLY
| O_CLOEXEC
);
307 f
= fdopen_cached(fd
, "re", &fdopen_cache
);
311 while (getline(&line
, &linelen
, f
) != -1) {
312 __do_free
char *task_pid
= NULL
;
315 task_pid
= strdup(line
);
319 pid
= realloc(*pid_buf
, sizeof(char *) * (sum
+ 1));
323 *(*pid_buf
+ sum
) = move_ptr(task_pid
);
331 * calc_load calculates the load according to the following formula:
332 * load1 = load0 * exp + active * (1 - exp)
334 * @load1: the new loadavg.
335 * @load0: the former loadavg.
336 * @active: the total number of running pid at this moment.
337 * @exp: the fixed-point defined in the beginning.
339 static uint64_t calc_load(uint64_t load
, uint64_t exp
, uint64_t active
)
343 active
= active
> 0 ? active
* FIXED_1
: 0;
344 newload
= load
* exp
+ active
* (FIXED_1
- exp
);
346 newload
+= FIXED_1
- 1;
348 return newload
/ FIXED_1
;
352 * Return 0 means that container p->cg is closed.
353 * Return -1 means that error occurred in refresh.
354 * Positive num equals the total number of pid.
356 static int refresh_load(struct load_node
*p
, const char *path
)
359 char proc_path
[STRLITERALLEN("/proc//task//status") +
360 2 * INTTYPE_TO_STRLEN(pid_t
) + 1];
361 int i
, ret
, run_pid
= 0, total_pid
= 0, last_pid
= 0;
366 idbuf
= must_realloc(NULL
, sizeof(char **));
368 sum
= calc_pid(&idbuf
, path
, DEPTH_DIR
, 0, p
->cfd
);
372 for (i
= 0; i
< sum
; i
++) {
373 __do_closedir
DIR *dp
= NULL
;
375 length
= strlen(idbuf
[i
]) - 1;
376 idbuf
[i
][length
] = '\0';
378 ret
= snprintf(proc_path
, sizeof(proc_path
), "/proc/%s/task", idbuf
[i
]);
379 if (ret
< 0 || (size_t)ret
> sizeof(proc_path
)) {
382 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
386 dp
= opendir(proc_path
);
388 lxcfs_error("Failed to open \"%s\"", proc_path
);
392 while ((file
= readdir(dp
)) != NULL
) {
393 __do_free
char *line
= NULL
;
394 __do_fclose
FILE *f
= NULL
;
396 if (strcmp(file
->d_name
, ".") == 0)
399 if (strcmp(file
->d_name
, "..") == 0)
404 /* We make the biggest pid become last_pid. */
405 ret
= atof(file
->d_name
);
406 last_pid
= (ret
> last_pid
) ? ret
: last_pid
;
408 ret
= snprintf(proc_path
, sizeof(proc_path
),
409 "/proc/%s/task/%s/status", idbuf
[i
], file
->d_name
);
410 if (ret
< 0 || (size_t)ret
> sizeof(proc_path
)) {
413 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
417 f
= fopen(proc_path
, "re");
421 while (getline(&line
, &linelen
, f
) != -1)
422 if ((line
[0] == 'S') && (line
[1] == 't'))
425 if ((line
[7] == 'R') || (line
[7] == 'D'))
430 /* Calculate the loadavg. */
431 p
->avenrun
[0] = calc_load(p
->avenrun
[0], EXP_1
, run_pid
);
432 p
->avenrun
[1] = calc_load(p
->avenrun
[1], EXP_5
, run_pid
);
433 p
->avenrun
[2] = calc_load(p
->avenrun
[2], EXP_15
, run_pid
);
434 p
->run_pid
= run_pid
;
435 p
->total_pid
= total_pid
;
436 p
->last_pid
= last_pid
;
446 /* Delete the load_node n and return the next node of it. */
447 static struct load_node
*del_node(struct load_node
*n
, int locate
)
451 pthread_rwlock_wrlock(&load_hash
[locate
].rdlock
);
452 if (n
->next
== NULL
) {
456 n
->next
->pre
= n
->pre
;
461 pthread_rwlock_unlock(&load_hash
[locate
].rdlock
);
466 * Traverse the hash table and update it.
468 static void *load_begin(void *arg
)
473 clock_t time1
, time2
;
476 if (loadavg_stop
== 1)
480 for (int i
= 0; i
< LOAD_SIZE
; i
++) {
481 pthread_mutex_lock(&load_hash
[i
].lock
);
482 if (load_hash
[i
].next
== NULL
) {
483 pthread_mutex_unlock(&load_hash
[i
].lock
);
487 f
= load_hash
[i
].next
;
490 __do_free
char *path
= NULL
;
492 path
= must_make_path_relative(f
->cg
, NULL
);
494 sum
= refresh_load(f
, path
);
500 /* load_hash[i].lock locks only on the first node.*/
501 if (first_node
== 1) {
503 pthread_mutex_unlock(&load_hash
[i
].lock
);
508 if (loadavg_stop
== 1)
512 usleep(FLUSH_TIME
* 1000000 -
513 (int)((time2
- time1
) * 1000000 / CLOCKS_PER_SEC
));
518 * init_load initialize the hash table.
519 * Return 0 on success, return -1 on failure.
521 static int init_load(void)
526 for (i
= 0; i
< LOAD_SIZE
; i
++) {
527 load_hash
[i
].next
= NULL
;
528 ret
= pthread_mutex_init(&load_hash
[i
].lock
, NULL
);
530 lxcfs_error("Failed to initialize lock");
534 ret
= pthread_rwlock_init(&load_hash
[i
].rdlock
, NULL
);
536 lxcfs_error("Failed to initialize rdlock");
540 ret
= pthread_rwlock_init(&load_hash
[i
].rilock
, NULL
);
542 lxcfs_error("Failed to initialize rilock");
550 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
552 pthread_mutex_destroy(&load_hash
[i
].lock
);
556 pthread_mutex_destroy(&load_hash
[i
].lock
);
557 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
558 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
564 static void load_free(void)
566 struct load_node
*f
, *p
;
568 for (int i
= 0; i
< LOAD_SIZE
; i
++) {
569 pthread_mutex_lock(&load_hash
[i
].lock
);
570 pthread_rwlock_wrlock(&load_hash
[i
].rilock
);
571 pthread_rwlock_wrlock(&load_hash
[i
].rdlock
);
572 if (load_hash
[i
].next
== NULL
) {
573 pthread_mutex_unlock(&load_hash
[i
].lock
);
574 pthread_mutex_destroy(&load_hash
[i
].lock
);
575 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
576 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
577 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
578 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
582 for (f
= load_hash
[i
].next
; f
;) {
589 pthread_mutex_unlock(&load_hash
[i
].lock
);
590 pthread_mutex_destroy(&load_hash
[i
].lock
);
591 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
592 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
593 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
594 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
598 /* Return a positive number on success, return 0 on failure.*/
599 pthread_t
load_daemon(int load_use
)
606 return log_error(0, "Initialize hash_table fails in load_daemon!");
608 ret
= pthread_create(&pid
, NULL
, load_begin
, NULL
);
611 return log_error(0, "Create pthread fails in load_daemon!");
614 /* use loadavg, here loadavg = 1*/
619 /* Returns 0 on success. */
620 int stop_load_daemon(pthread_t pid
)
624 /* Signal the thread to gracefully stop */
627 s
= pthread_join(pid
, NULL
); /* Make sure sub thread has been canceled. */
629 return log_error(-1, "stop_load_daemon error: failed to join");