]>
git.proxmox.com Git - mirror_lxcfs.git/blob - src/proc_loadavg.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
7 #ifndef FUSE_USE_VERSION
8 #define FUSE_USE_VERSION 26
11 #define _FILE_OFFSET_BITS 64
13 #define __STDC_FORMAT_MACROS
31 #include <linux/magic.h>
32 #include <linux/sched.h>
33 #include <sys/epoll.h>
35 #include <sys/mount.h>
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/syscall.h>
39 #include <sys/sysinfo.h>
44 #include "cgroup_fuse.h"
45 #include "cgroups/cgroup.h"
46 #include "cgroups/cgroup_utils.h"
47 #include "memory_utils.h"
51 * This parameter is used for proc_loadavg_read().
52 * 1 means use loadavg, 0 means not use.
54 static int loadavg
= 0;
56 /* The function of hash table.*/
57 #define LOAD_SIZE 100 /*the size of hash_table */
58 #define FLUSH_TIME 5 /*the flush rate */
59 #define DEPTH_DIR 3 /*the depth of per cgroup */
60 /* The function of calculate loadavg .*/
61 #define FSHIFT 11 /* nr of bits of precision */
62 #define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */
63 #define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
64 #define EXP_5 2014 /* 1/exp(5sec/5min) */
65 #define EXP_15 2037 /* 1/exp(5sec/15min) */
66 #define LOAD_INT(x) ((x) >> FSHIFT)
67 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
68 static volatile sig_atomic_t loadavg_stop
= 0;
76 unsigned int total_pid
;
77 unsigned int last_pid
;
78 /* The file descriptor of the mounted cgroup */
80 struct load_node
*next
;
81 struct load_node
**pre
;
86 * The lock is about insert load_node and refresh load_node.To the first
87 * load_node of each hash bucket, insert and refresh in this hash bucket is
92 * The rdlock is about read loadavg and delete load_node.To each hash
93 * bucket, read and delete is mutually exclusive. But at the same time, we
94 * allow paratactic read operation. This rdlock is at list level.
96 pthread_rwlock_t rdlock
;
98 * The rilock is about read loadavg and insert load_node.To the first
99 * load_node of each hash bucket, read and insert is mutually exclusive.
100 * But at the same time, we allow paratactic read operation.
102 pthread_rwlock_t rilock
;
103 struct load_node
*next
;
106 static struct load_head load_hash
[LOAD_SIZE
]; /* hash table */
109 * locate_node() finds special node. Not return NULL means success.
110 * It should be noted that rdlock isn't unlocked at the end of code
111 * because this function is used to read special node. Delete is not
112 * allowed before read has ended.
113 * unlock rdlock only in proc_loadavg_read().
115 static struct load_node
*locate_node(char *cg
, int locate
)
117 struct load_node
*f
= NULL
;
120 pthread_rwlock_rdlock(&load_hash
[locate
].rilock
);
121 pthread_rwlock_rdlock(&load_hash
[locate
].rdlock
);
122 if (load_hash
[locate
].next
== NULL
) {
123 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
126 f
= load_hash
[locate
].next
;
127 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
128 while (f
&& ((i
= strcmp(f
->cg
, cg
)) != 0))
133 static void insert_node(struct load_node
**n
, int locate
)
137 pthread_mutex_lock(&load_hash
[locate
].lock
);
138 pthread_rwlock_wrlock(&load_hash
[locate
].rilock
);
139 f
= load_hash
[locate
].next
;
140 load_hash
[locate
].next
= *n
;
142 (*n
)->pre
= &(load_hash
[locate
].next
);
144 f
->pre
= &((*n
)->next
);
146 pthread_mutex_unlock(&load_hash
[locate
].lock
);
147 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
150 int calc_hash(const char *name
)
152 unsigned int hash
= 0;
155 /* ELFHash algorithm. */
157 hash
= (hash
<< 4) + *name
++;
158 x
= hash
& 0xf0000000;
164 return (hash
& 0x7fffffff);
167 int proc_loadavg_read(char *buf
, size_t size
, off_t offset
,
168 struct fuse_file_info
*fi
)
170 __do_free
char *cg
= NULL
;
171 struct fuse_context
*fc
= fuse_get_context();
172 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
174 ssize_t total_len
= 0;
175 char *cache
= d
->buf
;
184 if (offset
> d
->size
)
190 left
= d
->size
- offset
;
191 total_len
= left
> size
? size
: left
;
192 memcpy(buf
, cache
+ offset
, total_len
);
197 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
199 initpid
= lookup_initpid_in_store(fc
->pid
);
200 if (initpid
<= 1 || is_shared_pidns(initpid
))
203 cg
= get_pid_cgroup(initpid
, "cpu");
205 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
207 prune_init_slice(cg
);
208 hash
= calc_hash(cg
) % LOAD_SIZE
;
209 n
= locate_node(cg
, hash
);
213 cfd
= get_cgroup_fd("cpu");
216 * In locate_node() above, pthread_rwlock_unlock() isn't used
217 * because delete is not allowed before read has ended.
219 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
224 n
= malloc(sizeof(struct load_node
));
228 n
->cg
= malloc(strlen(cg
)+1);
237 n
->last_pid
= initpid
;
239 insert_node(&n
, hash
);
241 a
= n
->avenrun
[0] + (FIXED_1
/200);
242 b
= n
->avenrun
[1] + (FIXED_1
/200);
243 c
= n
->avenrun
[2] + (FIXED_1
/200);
244 total_len
= snprintf(d
->buf
, d
->buflen
,
260 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
261 if (total_len
< 0 || total_len
>= d
->buflen
)
262 return log_error(0, "Failed to write to cache");
264 d
->size
= (int)total_len
;
267 if (total_len
> size
)
270 memcpy(buf
, d
->buf
, total_len
);
275 * Find the process pid from cgroup path.
276 * eg:from /sys/fs/cgroup/cpu/docker/containerid/cgroup.procs to find the process pid.
277 * @pid_buf : put pid to pid_buf.
278 * @dpath : the path of cgroup. eg: /docker/containerid or /docker/containerid/child-cgroup ...
279 * @depth : the depth of cgroup in container.
280 * @sum : return the number of pid.
281 * @cfd : the file descriptor of the mounted cgroup. eg: /sys/fs/cgroup/cpu
283 static int calc_pid(char ***pid_buf
, char *dpath
, int depth
, int sum
, int cfd
)
285 __do_free
char *path
= NULL
;
286 __do_free
void *fdopen_cache
= NULL
;
287 __do_close_prot_errno
int fd
= -EBADF
;
288 __do_fclose
FILE *f
= NULL
;
289 __do_closedir
DIR *dir
= NULL
;
296 /* path = dpath + "/cgroup.procs" + /0 */
297 path
= malloc(strlen(dpath
) + 20);
302 fd
= openat(cfd
, path
, O_RDONLY
| O_CLOEXEC
| O_NOFOLLOW
);
306 dir
= fdopendir(move_fd(fd
));
310 while (((file
= readdir(dir
)) != NULL
) && depth
> 0) {
311 if (strcmp(file
->d_name
, ".") == 0)
314 if (strcmp(file
->d_name
, "..") == 0)
317 if (file
->d_type
== DT_DIR
) {
318 __do_free
char *path_dir
= NULL
;
320 /* path + '/' + d_name +/0 */
321 path_dir
= malloc(strlen(path
) + 2 + sizeof(file
->d_name
));
325 strcpy(path_dir
, path
);
326 strcat(path_dir
, "/");
327 strcat(path_dir
, file
->d_name
);
329 sum
= calc_pid(pid_buf
, path_dir
, pd
, sum
, cfd
);
333 strcat(path
, "/cgroup.procs");
334 fd
= openat(cfd
, path
, O_RDONLY
);
338 f
= fdopen_cached(fd
, "re", &fdopen_cache
);
342 while (getline(&line
, &linelen
, f
) != -1) {
343 pid
= realloc(*pid_buf
, sizeof(char *) * (sum
+ 1));
348 *(*pid_buf
+ sum
) = malloc(strlen(line
) + 1);
349 if (!*(*pid_buf
+ sum
))
352 strcpy(*(*pid_buf
+ sum
), line
);
360 * calc_load calculates the load according to the following formula:
361 * load1 = load0 * exp + active * (1 - exp)
363 * @load1: the new loadavg.
364 * @load0: the former loadavg.
365 * @active: the total number of running pid at this moment.
366 * @exp: the fixed-point defined in the beginning.
368 static uint64_t calc_load(uint64_t load
, uint64_t exp
, uint64_t active
)
372 active
= active
> 0 ? active
* FIXED_1
: 0;
373 newload
= load
* exp
+ active
* (FIXED_1
- exp
);
375 newload
+= FIXED_1
- 1;
377 return newload
/ FIXED_1
;
381 * Return 0 means that container p->cg is closed.
382 * Return -1 means that error occurred in refresh.
383 * Positive num equals the total number of pid.
385 static int refresh_load(struct load_node
*p
, char *path
)
387 __do_free
char *line
= NULL
;
390 int i
, ret
, run_pid
= 0, total_pid
= 0, last_pid
= 0;
395 idbuf
= malloc(sizeof(char *));
399 sum
= calc_pid(&idbuf
, path
, DEPTH_DIR
, 0, p
->cfd
);
404 for (i
= 0; i
< sum
; i
++) {
405 __do_closedir
DIR *dp
= NULL
;
408 length
= strlen(idbuf
[i
]) - 1;
409 idbuf
[i
][length
] = '\0';
410 ret
= snprintf(proc_path
, 256, "/proc/%s/task", idbuf
[i
]);
411 if (ret
< 0 || ret
> 255) {
414 log_error(goto err_out
, "snprintf() failed in refresh_load");
417 dp
= opendir(proc_path
);
419 log_error(continue, "Open proc_path failed in refresh_load");
421 while ((file
= readdir(dp
)) != NULL
) {
422 __do_free
void *fopen_cache
= NULL
;
423 __do_fclose
FILE *f
= NULL
;
425 if (strncmp(file
->d_name
, ".", 1) == 0)
428 if (strncmp(file
->d_name
, "..", 1) == 0)
433 /* We make the biggest pid become last_pid.*/
434 ret
= atof(file
->d_name
);
435 last_pid
= (ret
> last_pid
) ? ret
: last_pid
;
437 ret
= snprintf(proc_path
, 256, "/proc/%s/task/%s/status",
438 idbuf
[i
], file
->d_name
);
439 if (ret
< 0 || ret
> 255) {
442 log_error(goto err_out
, "snprintf() failed in refresh_load");
445 f
= fopen_cached(proc_path
, "re", &fopen_cache
);
447 while (getline(&line
, &linelen
, f
) != -1) {
449 if ((strncmp(line
, "State", 5) == 0) &&
450 (strncmp(line
, "State R", 7) == 0 ||
451 strncmp(line
, "State D", 7) == 0))
458 /*Calculate the loadavg.*/
459 p
->avenrun
[0] = calc_load(p
->avenrun
[0], EXP_1
, run_pid
);
460 p
->avenrun
[1] = calc_load(p
->avenrun
[1], EXP_5
, run_pid
);
461 p
->avenrun
[2] = calc_load(p
->avenrun
[2], EXP_15
, run_pid
);
462 p
->run_pid
= run_pid
;
463 p
->total_pid
= total_pid
;
464 p
->last_pid
= last_pid
;
474 /* Delete the load_node n and return the next node of it. */
475 static struct load_node
*del_node(struct load_node
*n
, int locate
)
479 pthread_rwlock_wrlock(&load_hash
[locate
].rdlock
);
480 if (n
->next
== NULL
) {
484 n
->next
->pre
= n
->pre
;
489 pthread_rwlock_unlock(&load_hash
[locate
].rdlock
);
494 * Traverse the hash table and update it.
496 static void *load_begin(void *arg
)
499 int i
, sum
, length
, ret
;
502 clock_t time1
, time2
;
505 if (loadavg_stop
== 1)
509 for (i
= 0; i
< LOAD_SIZE
; i
++) {
510 pthread_mutex_lock(&load_hash
[i
].lock
);
511 if (load_hash
[i
].next
== NULL
) {
512 pthread_mutex_unlock(&load_hash
[i
].lock
);
515 f
= load_hash
[i
].next
;
518 __do_free
char *path
= NULL
;
520 length
= strlen(f
->cg
) + 2;
521 /* strlen(f->cg) + '.' or '' + \0 */
522 path
= malloc(length
);
526 ret
= snprintf(path
, length
, "%s%s", dot_or_empty(f
->cg
), f
->cg
);
527 /* Ignore the node if snprintf fails.*/
528 if (ret
< 0 || ret
> length
- 1)
529 log_error(goto out
, "Refresh node %s failed for snprintf()", f
->cg
);
531 sum
= refresh_load(f
, path
);
536 /* load_hash[i].lock locks only on the first node.*/
537 if (first_node
== 1) {
539 pthread_mutex_unlock(&load_hash
[i
].lock
);
544 if (loadavg_stop
== 1)
548 usleep(FLUSH_TIME
* 1000000 - (int)((time2
- time1
) * 1000000 / CLOCKS_PER_SEC
));
553 * init_load initialize the hash table.
554 * Return 0 on success, return -1 on failure.
556 static int init_load(void)
561 for (i
= 0; i
< LOAD_SIZE
; i
++) {
562 load_hash
[i
].next
= NULL
;
563 ret
= pthread_mutex_init(&load_hash
[i
].lock
, NULL
);
565 lxcfs_error("Failed to initialize lock");
569 ret
= pthread_rwlock_init(&load_hash
[i
].rdlock
, NULL
);
571 lxcfs_error("Failed to initialize rdlock");
575 ret
= pthread_rwlock_init(&load_hash
[i
].rilock
, NULL
);
577 lxcfs_error("Failed to initialize rilock");
585 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
587 pthread_mutex_destroy(&load_hash
[i
].lock
);
591 pthread_mutex_destroy(&load_hash
[i
].lock
);
592 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
593 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
599 static void load_free(void)
601 struct load_node
*f
, *p
;
603 for (int i
= 0; i
< LOAD_SIZE
; i
++) {
604 pthread_mutex_lock(&load_hash
[i
].lock
);
605 pthread_rwlock_wrlock(&load_hash
[i
].rilock
);
606 pthread_rwlock_wrlock(&load_hash
[i
].rdlock
);
607 if (load_hash
[i
].next
== NULL
) {
608 pthread_mutex_unlock(&load_hash
[i
].lock
);
609 pthread_mutex_destroy(&load_hash
[i
].lock
);
610 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
611 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
612 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
613 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
617 for (f
= load_hash
[i
].next
; f
;) {
624 pthread_mutex_unlock(&load_hash
[i
].lock
);
625 pthread_mutex_destroy(&load_hash
[i
].lock
);
626 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
627 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
628 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
629 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
633 /* Return a positive number on success, return 0 on failure.*/
634 pthread_t
load_daemon(int load_use
)
641 return log_error(0, "Initialize hash_table fails in load_daemon!");
643 ret
= pthread_create(&pid
, NULL
, load_begin
, NULL
);
646 return log_error(0, "Create pthread fails in load_daemon!");
649 /* use loadavg, here loadavg = 1*/
654 /* Returns 0 on success. */
655 int stop_load_daemon(pthread_t pid
)
659 /* Signal the thread to gracefully stop */
662 s
= pthread_join(pid
, NULL
); /* Make sure sub thread has been canceled. */
664 return log_error(-1, "stop_load_daemon error: failed to join");