]>
git.proxmox.com Git - mirror_lxcfs.git/blob - src/proc_loadavg.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
9 #define __STDC_FORMAT_MACROS
27 #include <linux/magic.h>
28 #include <linux/sched.h>
29 #include <sys/epoll.h>
31 #include <sys/mount.h>
32 #include <sys/param.h>
33 #include <sys/socket.h>
34 #include <sys/syscall.h>
35 #include <sys/sysinfo.h>
39 #include "cgroup_fuse.h"
40 #include "cgroups/cgroup.h"
41 #include "cgroups/cgroup_utils.h"
42 #include "memory_utils.h"
46 * This parameter is used for proc_loadavg_read().
47 * 1 means use loadavg, 0 means not use.
49 static int loadavg
= 0;
51 /* The function of hash table.*/
52 #define LOAD_SIZE 100 /*the size of hash_table */
53 #define FLUSH_TIME 5 /*the flush rate */
54 #define DEPTH_DIR 3 /*the depth of per cgroup */
55 /* The function of calculate loadavg .*/
56 #define FSHIFT 11 /* nr of bits of precision */
57 #define FIXED_1 (1 << FSHIFT) /* 1.0 as fixed-point */
58 #define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
59 #define EXP_5 2014 /* 1/exp(5sec/5min) */
60 #define EXP_15 2037 /* 1/exp(5sec/15min) */
61 #define LOAD_INT(x) ((x) >> FSHIFT)
62 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
63 static volatile sig_atomic_t loadavg_stop
= 0;
71 unsigned int total_pid
;
72 unsigned int last_pid
;
73 /* The file descriptor of the mounted cgroup */
75 struct load_node
*next
;
76 struct load_node
**pre
;
81 * The lock is about insert load_node and refresh load_node.To the first
82 * load_node of each hash bucket, insert and refresh in this hash bucket is
87 * The rdlock is about read loadavg and delete load_node.To each hash
88 * bucket, read and delete is mutually exclusive. But at the same time, we
89 * allow paratactic read operation. This rdlock is at list level.
91 pthread_rwlock_t rdlock
;
93 * The rilock is about read loadavg and insert load_node.To the first
94 * load_node of each hash bucket, read and insert is mutually exclusive.
95 * But at the same time, we allow paratactic read operation.
97 pthread_rwlock_t rilock
;
98 struct load_node
*next
;
101 static struct load_head load_hash
[LOAD_SIZE
]; /* hash table */
104 * locate_node() finds special node. Not return NULL means success.
105 * It should be noted that rdlock isn't unlocked at the end of code
106 * because this function is used to read special node. Delete is not
107 * allowed before read has ended.
108 * unlock rdlock only in proc_loadavg_read().
110 static struct load_node
*locate_node(char *cg
, int locate
)
112 struct load_node
*f
= NULL
;
115 pthread_rwlock_rdlock(&load_hash
[locate
].rilock
);
116 pthread_rwlock_rdlock(&load_hash
[locate
].rdlock
);
117 if (load_hash
[locate
].next
== NULL
) {
118 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
121 f
= load_hash
[locate
].next
;
122 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
123 while (f
&& ((i
= strcmp(f
->cg
, cg
)) != 0))
128 static void insert_node(struct load_node
**n
, int locate
)
132 pthread_mutex_lock(&load_hash
[locate
].lock
);
133 pthread_rwlock_wrlock(&load_hash
[locate
].rilock
);
134 f
= load_hash
[locate
].next
;
135 load_hash
[locate
].next
= *n
;
137 (*n
)->pre
= &(load_hash
[locate
].next
);
139 f
->pre
= &((*n
)->next
);
141 pthread_mutex_unlock(&load_hash
[locate
].lock
);
142 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
145 int calc_hash(const char *name
)
147 unsigned int hash
= 0;
150 /* ELFHash algorithm. */
152 hash
= (hash
<< 4) + *name
++;
153 x
= hash
& 0xf0000000;
159 return (hash
& 0x7fffffff);
162 int proc_loadavg_read(char *buf
, size_t size
, off_t offset
,
163 struct fuse_file_info
*fi
)
165 __do_free
char *cg
= NULL
;
166 struct fuse_context
*fc
= fuse_get_context();
167 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
169 ssize_t total_len
= 0;
178 if (offset
> d
->size
)
184 left
= d
->size
- offset
;
185 total_len
= left
> size
? size
: left
;
186 memcpy(buf
, d
->buf
+ offset
, total_len
);
191 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
193 initpid
= lookup_initpid_in_store(fc
->pid
);
194 if (initpid
<= 1 || is_shared_pidns(initpid
))
197 cg
= get_pid_cgroup(initpid
, "cpu");
199 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
201 prune_init_slice(cg
);
202 hash
= calc_hash(cg
) % LOAD_SIZE
;
203 n
= locate_node(cg
, hash
);
207 cfd
= get_cgroup_fd("cpu");
210 * In locate_node() above, pthread_rwlock_unlock() isn't used
211 * because delete is not allowed before read has ended.
213 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
214 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
217 n
= must_realloc(NULL
, sizeof(struct load_node
));
218 n
->cg
= move_ptr(cg
);
224 n
->last_pid
= initpid
;
226 insert_node(&n
, hash
);
228 a
= n
->avenrun
[0] + (FIXED_1
/ 200);
229 b
= n
->avenrun
[1] + (FIXED_1
/ 200);
230 c
= n
->avenrun
[2] + (FIXED_1
/ 200);
231 total_len
= snprintf(d
->buf
, d
->buflen
,
247 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
248 if (total_len
< 0 || total_len
>= d
->buflen
)
249 return log_error(0, "Failed to write to cache");
251 d
->size
= (int)total_len
;
254 if (total_len
> size
)
257 memcpy(buf
, d
->buf
, total_len
);
262 * Find the process pid from cgroup path.
263 * eg:from /sys/fs/cgroup/cpu/docker/containerid/cgroup.procs to find the process pid.
264 * @pid_buf : put pid to pid_buf.
265 * @dpath : the path of cgroup. eg: /docker/containerid or /docker/containerid/child-cgroup ...
266 * @depth : the depth of cgroup in container.
267 * @sum : return the number of pid.
268 * @cfd : the file descriptor of the mounted cgroup. eg: /sys/fs/cgroup/cpu
270 static int calc_pid(char ***pid_buf
, const char *rel_path
, int depth
, int sum
, int cfd
)
272 __do_free
char *line
= NULL
, *path
= NULL
;
273 __do_free
void *fdopen_cache
= NULL
;
274 __do_close
int fd
= -EBADF
;
275 __do_fclose
FILE *f
= NULL
;
276 __do_closedir
DIR *dir
= NULL
;
281 fd
= openat(cfd
, rel_path
, O_RDONLY
| O_CLOEXEC
);
288 /* Transfer ownership to fdopendir(). */
291 while (((file
= readdir(dir
)) != NULL
) && depth
> 0) {
292 if (strcmp(file
->d_name
, ".") == 0)
295 if (strcmp(file
->d_name
, "..") == 0)
298 if (file
->d_type
== DT_DIR
) {
299 __do_free
char *path_next
= NULL
;
300 path_next
= must_make_path(rel_path
, "/", file
->d_name
, NULL
);
302 sum
= calc_pid(pid_buf
, path_next
, pd
, sum
, cfd
);
306 path
= must_make_path(rel_path
, "/cgroup.procs", NULL
);
307 fd
= openat(cfd
, path
, O_RDONLY
| O_CLOEXEC
);
311 f
= fdopen_cached(fd
, "re", &fdopen_cache
);
315 while (getline(&line
, &linelen
, f
) != -1) {
316 __do_free
char *task_pid
= NULL
;
319 task_pid
= strdup(line
);
323 pid
= realloc(*pid_buf
, sizeof(char *) * (sum
+ 1));
327 *(*pid_buf
+ sum
) = move_ptr(task_pid
);
335 * calc_load calculates the load according to the following formula:
336 * load1 = load0 * exp + active * (1 - exp)
338 * @load1: the new loadavg.
339 * @load0: the former loadavg.
340 * @active: the total number of running pid at this moment.
341 * @exp: the fixed-point defined in the beginning.
343 static uint64_t calc_load(uint64_t load
, uint64_t exp
, uint64_t active
)
347 active
= active
> 0 ? active
* FIXED_1
: 0;
348 newload
= load
* exp
+ active
* (FIXED_1
- exp
);
350 newload
+= FIXED_1
- 1;
352 return newload
/ FIXED_1
;
356 * Return 0 means that container p->cg is closed.
357 * Return -1 means that error occurred in refresh.
358 * Positive num equals the total number of pid.
360 static int refresh_load(struct load_node
*p
, const char *path
)
363 char proc_path
[STRLITERALLEN("/proc//task//status") +
364 2 * INTTYPE_TO_STRLEN(pid_t
) + 1];
365 int i
, ret
, run_pid
= 0, total_pid
= 0, last_pid
= 0;
370 idbuf
= must_realloc(NULL
, sizeof(char **));
372 sum
= calc_pid(&idbuf
, path
, DEPTH_DIR
, 0, p
->cfd
);
376 for (i
= 0; i
< sum
; i
++) {
377 __do_closedir
DIR *dp
= NULL
;
379 length
= strlen(idbuf
[i
]) - 1;
380 idbuf
[i
][length
] = '\0';
382 ret
= snprintf(proc_path
, sizeof(proc_path
), "/proc/%s/task", idbuf
[i
]);
383 if (ret
< 0 || (size_t)ret
> sizeof(proc_path
)) {
386 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
390 dp
= opendir(proc_path
);
392 lxcfs_error("Failed to open \"%s\"", proc_path
);
396 while ((file
= readdir(dp
)) != NULL
) {
397 __do_free
char *line
= NULL
;
398 __do_fclose
FILE *f
= NULL
;
400 if (strcmp(file
->d_name
, ".") == 0)
403 if (strcmp(file
->d_name
, "..") == 0)
408 /* We make the biggest pid become last_pid. */
409 ret
= atof(file
->d_name
);
410 last_pid
= (ret
> last_pid
) ? ret
: last_pid
;
412 ret
= snprintf(proc_path
, sizeof(proc_path
),
413 "/proc/%s/task/%s/status", idbuf
[i
], file
->d_name
);
414 if (ret
< 0 || (size_t)ret
> sizeof(proc_path
)) {
417 lxcfs_error("%s\n", "snprintf() failed in refresh_load.");
421 f
= fopen(proc_path
, "re");
425 while (getline(&line
, &linelen
, f
) != -1)
426 if ((line
[0] == 'S') && (line
[1] == 't'))
429 if ((line
[7] == 'R') || (line
[7] == 'D'))
434 /* Calculate the loadavg. */
435 p
->avenrun
[0] = calc_load(p
->avenrun
[0], EXP_1
, run_pid
);
436 p
->avenrun
[1] = calc_load(p
->avenrun
[1], EXP_5
, run_pid
);
437 p
->avenrun
[2] = calc_load(p
->avenrun
[2], EXP_15
, run_pid
);
438 p
->run_pid
= run_pid
;
439 p
->total_pid
= total_pid
;
440 p
->last_pid
= last_pid
;
450 /* Delete the load_node n and return the next node of it. */
451 static struct load_node
*del_node(struct load_node
*n
, int locate
)
455 pthread_rwlock_wrlock(&load_hash
[locate
].rdlock
);
456 if (n
->next
== NULL
) {
460 n
->next
->pre
= n
->pre
;
465 pthread_rwlock_unlock(&load_hash
[locate
].rdlock
);
470 * Traverse the hash table and update it.
472 static void *load_begin(void *arg
)
477 clock_t time1
, time2
;
480 if (loadavg_stop
== 1)
484 for (int i
= 0; i
< LOAD_SIZE
; i
++) {
485 pthread_mutex_lock(&load_hash
[i
].lock
);
486 if (load_hash
[i
].next
== NULL
) {
487 pthread_mutex_unlock(&load_hash
[i
].lock
);
491 f
= load_hash
[i
].next
;
494 __do_free
char *path
= NULL
;
496 path
= must_make_path_relative(f
->cg
, NULL
);
498 sum
= refresh_load(f
, path
);
504 /* load_hash[i].lock locks only on the first node.*/
505 if (first_node
== 1) {
507 pthread_mutex_unlock(&load_hash
[i
].lock
);
512 if (loadavg_stop
== 1)
516 usleep(FLUSH_TIME
* 1000000 -
517 (int)((time2
- time1
) * 1000000 / CLOCKS_PER_SEC
));
522 * init_load initialize the hash table.
523 * Return 0 on success, return -1 on failure.
525 static int init_load(void)
530 for (i
= 0; i
< LOAD_SIZE
; i
++) {
531 load_hash
[i
].next
= NULL
;
532 ret
= pthread_mutex_init(&load_hash
[i
].lock
, NULL
);
534 lxcfs_error("Failed to initialize lock");
538 ret
= pthread_rwlock_init(&load_hash
[i
].rdlock
, NULL
);
540 lxcfs_error("Failed to initialize rdlock");
544 ret
= pthread_rwlock_init(&load_hash
[i
].rilock
, NULL
);
546 lxcfs_error("Failed to initialize rilock");
554 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
556 pthread_mutex_destroy(&load_hash
[i
].lock
);
560 pthread_mutex_destroy(&load_hash
[i
].lock
);
561 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
562 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
568 static void load_free(void)
570 struct load_node
*f
, *p
;
572 for (int i
= 0; i
< LOAD_SIZE
; i
++) {
573 pthread_mutex_lock(&load_hash
[i
].lock
);
574 pthread_rwlock_wrlock(&load_hash
[i
].rilock
);
575 pthread_rwlock_wrlock(&load_hash
[i
].rdlock
);
576 if (load_hash
[i
].next
== NULL
) {
577 pthread_mutex_unlock(&load_hash
[i
].lock
);
578 pthread_mutex_destroy(&load_hash
[i
].lock
);
579 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
580 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
581 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
582 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
586 for (f
= load_hash
[i
].next
; f
;) {
593 pthread_mutex_unlock(&load_hash
[i
].lock
);
594 pthread_mutex_destroy(&load_hash
[i
].lock
);
595 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
596 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
597 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
598 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
602 /* Return a positive number on success, return 0 on failure.*/
603 pthread_t
load_daemon(int load_use
)
610 return log_error(0, "Initialize hash_table fails in load_daemon!");
612 ret
= pthread_create(&pid
, NULL
, load_begin
, NULL
);
615 return log_error(0, "Create pthread fails in load_daemon!");
618 /* use loadavg, here loadavg = 1*/
623 /* Returns 0 on success. */
624 int stop_load_daemon(pthread_t pid
)
628 /* Signal the thread to gracefully stop */
631 s
= pthread_join(pid
, NULL
); /* Make sure sub thread has been canceled. */
633 return log_error(-1, "stop_load_daemon error: failed to join");