]>
git.proxmox.com Git - mirror_lxcfs.git/blob - src/proc_loadavg.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
7 #ifndef FUSE_USE_VERSION
8 #define FUSE_USE_VERSION 26
11 #define _FILE_OFFSET_BITS 64
13 #define __STDC_FORMAT_MACROS
31 #include <linux/magic.h>
32 #include <linux/sched.h>
33 #include <sys/epoll.h>
35 #include <sys/mount.h>
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/syscall.h>
39 #include <sys/sysinfo.h>
44 #include "cgroup_fuse.h"
45 #include "cgroups/cgroup.h"
46 #include "cgroups/cgroup_utils.h"
47 #include "memory_utils.h"
51 * This parameter is used for proc_loadavg_read().
52 * 1 means use loadavg, 0 means not use.
54 static int loadavg
= 0;
56 /* The function of hash table.*/
57 #define LOAD_SIZE 100 /*the size of hash_table */
58 #define FLUSH_TIME 5 /*the flush rate */
59 #define DEPTH_DIR 3 /*the depth of per cgroup */
60 /* The function of calculate loadavg .*/
61 #define FSHIFT 11 /* nr of bits of precision */
62 #define FIXED_1 (1<<FSHIFT) /* 1.0 as fixed-point */
63 #define EXP_1 1884 /* 1/exp(5sec/1min) as fixed-point */
64 #define EXP_5 2014 /* 1/exp(5sec/5min) */
65 #define EXP_15 2037 /* 1/exp(5sec/15min) */
66 #define LOAD_INT(x) ((x) >> FSHIFT)
67 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
68 static volatile sig_atomic_t loadavg_stop
= 0;
72 unsigned long avenrun
[3]; /* Load averages */
74 unsigned int total_pid
;
75 unsigned int last_pid
;
76 int cfd
; /* The file descriptor of the mounted cgroup */
77 struct load_node
*next
;
78 struct load_node
**pre
;
83 * The lock is about insert load_node and refresh load_node.To the first
84 * load_node of each hash bucket, insert and refresh in this hash bucket is
89 * The rdlock is about read loadavg and delete load_node.To each hash
90 * bucket, read and delete is mutually exclusive. But at the same time, we
91 * allow paratactic read operation. This rdlock is at list level.
93 pthread_rwlock_t rdlock
;
95 * The rilock is about read loadavg and insert load_node.To the first
96 * load_node of each hash bucket, read and insert is mutually exclusive.
97 * But at the same time, we allow paratactic read operation.
99 pthread_rwlock_t rilock
;
100 struct load_node
*next
;
103 static struct load_head load_hash
[LOAD_SIZE
]; /* hash table */
106 * locate_node() finds special node. Not return NULL means success.
107 * It should be noted that rdlock isn't unlocked at the end of code
108 * because this function is used to read special node. Delete is not
109 * allowed before read has ended.
110 * unlock rdlock only in proc_loadavg_read().
112 static struct load_node
*locate_node(char *cg
, int locate
)
114 struct load_node
*f
= NULL
;
117 pthread_rwlock_rdlock(&load_hash
[locate
].rilock
);
118 pthread_rwlock_rdlock(&load_hash
[locate
].rdlock
);
119 if (load_hash
[locate
].next
== NULL
) {
120 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
123 f
= load_hash
[locate
].next
;
124 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
125 while (f
&& ((i
= strcmp(f
->cg
, cg
)) != 0))
130 static void insert_node(struct load_node
**n
, int locate
)
134 pthread_mutex_lock(&load_hash
[locate
].lock
);
135 pthread_rwlock_wrlock(&load_hash
[locate
].rilock
);
136 f
= load_hash
[locate
].next
;
137 load_hash
[locate
].next
= *n
;
139 (*n
)->pre
= &(load_hash
[locate
].next
);
141 f
->pre
= &((*n
)->next
);
143 pthread_mutex_unlock(&load_hash
[locate
].lock
);
144 pthread_rwlock_unlock(&load_hash
[locate
].rilock
);
147 int calc_hash(const char *name
)
149 unsigned int hash
= 0;
152 /* ELFHash algorithm. */
154 hash
= (hash
<< 4) + *name
++;
155 x
= hash
& 0xf0000000;
161 return (hash
& 0x7fffffff);
164 int proc_loadavg_read(char *buf
, size_t size
, off_t offset
,
165 struct fuse_file_info
*fi
)
167 __do_free
char *cg
= NULL
;
168 struct fuse_context
*fc
= fuse_get_context();
169 struct file_info
*d
= INTTYPE_TO_PTR(fi
->fh
);
171 size_t total_len
= 0;
172 char *cache
= d
->buf
;
176 unsigned long a
, b
, c
;
181 if (offset
> d
->size
)
187 left
= d
->size
- offset
;
188 total_len
= left
> size
? size
: left
;
189 memcpy(buf
, cache
+ offset
, total_len
);
194 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
196 initpid
= lookup_initpid_in_store(fc
->pid
);
197 if (initpid
<= 1 || is_shared_pidns(initpid
))
200 cg
= get_pid_cgroup(initpid
, "cpu");
202 return read_file_fuse("/proc/loadavg", buf
, size
, d
);
204 prune_init_slice(cg
);
205 hash
= calc_hash(cg
) % LOAD_SIZE
;
206 n
= locate_node(cg
, hash
);
210 cfd
= get_cgroup_fd("cpu");
213 * In locate_node() above, pthread_rwlock_unlock() isn't used
214 * because delete is not allowed before read has ended.
216 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
221 n
= malloc(sizeof(struct load_node
));
225 n
->cg
= malloc(strlen(cg
)+1);
234 n
->last_pid
= initpid
;
236 insert_node(&n
, hash
);
238 a
= n
->avenrun
[0] + (FIXED_1
/200);
239 b
= n
->avenrun
[1] + (FIXED_1
/200);
240 c
= n
->avenrun
[2] + (FIXED_1
/200);
241 total_len
= snprintf(d
->buf
, d
->buflen
,
257 pthread_rwlock_unlock(&load_hash
[hash
].rdlock
);
258 if (total_len
< 0 || total_len
>= d
->buflen
)
259 return log_error(0, "Failed to write to cache");
261 d
->size
= (int)total_len
;
264 if (total_len
> size
)
267 memcpy(buf
, d
->buf
, total_len
);
272 * Find the process pid from cgroup path.
273 * eg:from /sys/fs/cgroup/cpu/docker/containerid/cgroup.procs to find the process pid.
274 * @pid_buf : put pid to pid_buf.
275 * @dpath : the path of cgroup. eg: /docker/containerid or /docker/containerid/child-cgroup ...
276 * @depth : the depth of cgroup in container.
277 * @sum : return the number of pid.
278 * @cfd : the file descriptor of the mounted cgroup. eg: /sys/fs/cgroup/cpu
280 static int calc_pid(char ***pid_buf
, char *dpath
, int depth
, int sum
, int cfd
)
282 __do_free
char *path
= NULL
;
283 __do_free
void *fdopen_cache
= NULL
;
284 __do_close_prot_errno
int fd
= -EBADF
;
285 __do_fclose
FILE *f
= NULL
;
286 __do_closedir
DIR *dir
= NULL
;
293 /* path = dpath + "/cgroup.procs" + /0 */
294 path
= malloc(strlen(dpath
) + 20);
299 fd
= openat(cfd
, path
, O_RDONLY
| O_CLOEXEC
| O_NOFOLLOW
);
303 dir
= fdopendir(move_fd(fd
));
307 while (((file
= readdir(dir
)) != NULL
) && depth
> 0) {
308 if (strcmp(file
->d_name
, ".") == 0)
311 if (strcmp(file
->d_name
, "..") == 0)
314 if (file
->d_type
== DT_DIR
) {
315 __do_free
char *path_dir
= NULL
;
317 /* path + '/' + d_name +/0 */
318 path_dir
= malloc(strlen(path
) + 2 + sizeof(file
->d_name
));
322 strcpy(path_dir
, path
);
323 strcat(path_dir
, "/");
324 strcat(path_dir
, file
->d_name
);
326 sum
= calc_pid(pid_buf
, path_dir
, pd
, sum
, cfd
);
330 strcat(path
, "/cgroup.procs");
331 fd
= openat(cfd
, path
, O_RDONLY
);
335 f
= fdopen_cached(fd
, "re", &fdopen_cache
);
339 while (getline(&line
, &linelen
, f
) != -1) {
340 pid
= realloc(*pid_buf
, sizeof(char *) * (sum
+ 1));
345 *(*pid_buf
+ sum
) = malloc(strlen(line
) + 1);
346 if (!*(*pid_buf
+ sum
))
349 strcpy(*(*pid_buf
+ sum
), line
);
357 * calc_load calculates the load according to the following formula:
358 * load1 = load0 * exp + active * (1 - exp)
360 * @load1: the new loadavg.
361 * @load0: the former loadavg.
362 * @active: the total number of running pid at this moment.
363 * @exp: the fixed-point defined in the beginning.
365 static unsigned long calc_load(unsigned long load
, unsigned long exp
,
366 unsigned long active
)
368 unsigned long newload
;
370 active
= active
> 0 ? active
* FIXED_1
: 0;
371 newload
= load
* exp
+ active
* (FIXED_1
- exp
);
373 newload
+= FIXED_1
- 1;
375 return newload
/ FIXED_1
;
379 * Return 0 means that container p->cg is closed.
380 * Return -1 means that error occurred in refresh.
381 * Positive num equals the total number of pid.
383 static int refresh_load(struct load_node
*p
, char *path
)
385 __do_free
char *line
= NULL
;
388 int i
, ret
, run_pid
= 0, total_pid
= 0, last_pid
= 0;
393 idbuf
= malloc(sizeof(char *));
397 sum
= calc_pid(&idbuf
, path
, DEPTH_DIR
, 0, p
->cfd
);
402 for (i
= 0; i
< sum
; i
++) {
403 __do_closedir
DIR *dp
= NULL
;
406 length
= strlen(idbuf
[i
]) - 1;
407 idbuf
[i
][length
] = '\0';
408 ret
= snprintf(proc_path
, 256, "/proc/%s/task", idbuf
[i
]);
409 if (ret
< 0 || ret
> 255) {
412 log_error(goto err_out
, "snprintf() failed in refresh_load");
415 dp
= opendir(proc_path
);
417 log_error(continue, "Open proc_path failed in refresh_load");
419 while ((file
= readdir(dp
)) != NULL
) {
420 __do_free
void *fopen_cache
= NULL
;
421 __do_fclose
FILE *f
= NULL
;
423 if (strncmp(file
->d_name
, ".", 1) == 0)
426 if (strncmp(file
->d_name
, "..", 1) == 0)
431 /* We make the biggest pid become last_pid.*/
432 ret
= atof(file
->d_name
);
433 last_pid
= (ret
> last_pid
) ? ret
: last_pid
;
435 ret
= snprintf(proc_path
, 256, "/proc/%s/task/%s/status",
436 idbuf
[i
], file
->d_name
);
437 if (ret
< 0 || ret
> 255) {
440 log_error(goto err_out
, "snprintf() failed in refresh_load");
443 f
= fopen_cached(proc_path
, "re", &fopen_cache
);
445 while (getline(&line
, &linelen
, f
) != -1) {
447 if ((strncmp(line
, "State", 5) == 0) &&
448 (strncmp(line
, "State R", 7) == 0 ||
449 strncmp(line
, "State D", 7) == 0))
456 /*Calculate the loadavg.*/
457 p
->avenrun
[0] = calc_load(p
->avenrun
[0], EXP_1
, run_pid
);
458 p
->avenrun
[1] = calc_load(p
->avenrun
[1], EXP_5
, run_pid
);
459 p
->avenrun
[2] = calc_load(p
->avenrun
[2], EXP_15
, run_pid
);
460 p
->run_pid
= run_pid
;
461 p
->total_pid
= total_pid
;
462 p
->last_pid
= last_pid
;
472 /* Delete the load_node n and return the next node of it. */
473 static struct load_node
*del_node(struct load_node
*n
, int locate
)
477 pthread_rwlock_wrlock(&load_hash
[locate
].rdlock
);
478 if (n
->next
== NULL
) {
482 n
->next
->pre
= n
->pre
;
487 pthread_rwlock_unlock(&load_hash
[locate
].rdlock
);
492 * Traverse the hash table and update it.
494 static void *load_begin(void *arg
)
497 int i
, sum
, length
, ret
;
500 clock_t time1
, time2
;
503 if (loadavg_stop
== 1)
507 for (i
= 0; i
< LOAD_SIZE
; i
++) {
508 pthread_mutex_lock(&load_hash
[i
].lock
);
509 if (load_hash
[i
].next
== NULL
) {
510 pthread_mutex_unlock(&load_hash
[i
].lock
);
513 f
= load_hash
[i
].next
;
516 __do_free
char *path
= NULL
;
518 length
= strlen(f
->cg
) + 2;
519 /* strlen(f->cg) + '.' or '' + \0 */
520 path
= malloc(length
);
524 ret
= snprintf(path
, length
, "%s%s", dot_or_empty(f
->cg
), f
->cg
);
525 /* Ignore the node if snprintf fails.*/
526 if (ret
< 0 || ret
> length
- 1)
527 log_error(goto out
, "Refresh node %s failed for snprintf()", f
->cg
);
529 sum
= refresh_load(f
, path
);
534 /* load_hash[i].lock locks only on the first node.*/
535 if (first_node
== 1) {
537 pthread_mutex_unlock(&load_hash
[i
].lock
);
542 if (loadavg_stop
== 1)
546 usleep(FLUSH_TIME
* 1000000 - (int)((time2
- time1
) * 1000000 / CLOCKS_PER_SEC
));
551 * init_load initialize the hash table.
552 * Return 0 on success, return -1 on failure.
554 static int init_load(void)
559 for (i
= 0; i
< LOAD_SIZE
; i
++) {
560 load_hash
[i
].next
= NULL
;
561 ret
= pthread_mutex_init(&load_hash
[i
].lock
, NULL
);
563 lxcfs_error("Failed to initialize lock");
567 ret
= pthread_rwlock_init(&load_hash
[i
].rdlock
, NULL
);
569 lxcfs_error("Failed to initialize rdlock");
573 ret
= pthread_rwlock_init(&load_hash
[i
].rilock
, NULL
);
575 lxcfs_error("Failed to initialize rilock");
583 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
585 pthread_mutex_destroy(&load_hash
[i
].lock
);
589 pthread_mutex_destroy(&load_hash
[i
].lock
);
590 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
591 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
597 static void load_free(void)
599 struct load_node
*f
, *p
;
601 for (int i
= 0; i
< LOAD_SIZE
; i
++) {
602 pthread_mutex_lock(&load_hash
[i
].lock
);
603 pthread_rwlock_wrlock(&load_hash
[i
].rilock
);
604 pthread_rwlock_wrlock(&load_hash
[i
].rdlock
);
605 if (load_hash
[i
].next
== NULL
) {
606 pthread_mutex_unlock(&load_hash
[i
].lock
);
607 pthread_mutex_destroy(&load_hash
[i
].lock
);
608 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
609 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
610 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
611 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
615 for (f
= load_hash
[i
].next
; f
;) {
622 pthread_mutex_unlock(&load_hash
[i
].lock
);
623 pthread_mutex_destroy(&load_hash
[i
].lock
);
624 pthread_rwlock_unlock(&load_hash
[i
].rilock
);
625 pthread_rwlock_destroy(&load_hash
[i
].rilock
);
626 pthread_rwlock_unlock(&load_hash
[i
].rdlock
);
627 pthread_rwlock_destroy(&load_hash
[i
].rdlock
);
631 /* Return a positive number on success, return 0 on failure.*/
632 pthread_t
load_daemon(int load_use
)
639 return log_error(0, "Initialize hash_table fails in load_daemon!");
641 ret
= pthread_create(&pid
, NULL
, load_begin
, NULL
);
644 return log_error(0, "Create pthread fails in load_daemon!");
647 /* use loadavg, here loadavg = 1*/
652 /* Returns 0 on success. */
653 int stop_load_daemon(pthread_t pid
)
657 /* Signal the thread to gracefully stop */
660 s
= pthread_join(pid
, NULL
); /* Make sure sub thread has been canceled. */
662 return log_error(-1, "stop_load_daemon error: failed to join");