LXC_TYPE_PROC_STAT,
LXC_TYPE_PROC_DISKSTATS,
LXC_TYPE_PROC_SWAPS,
+ LXC_TYPE_PROC_LOADAVG,
};
struct file_info {
int cached;
};
+/* The function of hash table.*/
+#define LOAD_SIZE 100 /*the size of hash_table */
+static int calc_hash(char *name)
+{
+ unsigned int hash = 0;
+ unsigned int x = 0;
+ /* ELFHash algorithm. */
+ while (*name) {
+ hash = (hash << 4) + *name++;
+ x = hash & 0xf0000000;
+ if (x != 0)
+ hash ^= (x >> 24);
+ hash &= ~x;
+ }
+ return ((hash & 0x7fffffff) % LOAD_SIZE);
+}
+
+struct load_node {
+ char *cg; /*cg */
+ unsigned long avenrun[3]; /* Load averages */
+ unsigned int run_pid;
+ unsigned int total_pid;
+ unsigned int last_pid;
+ int cfd; /* The file descriptor of the mounted cgroup */
+ struct load_node *next;
+ struct load_node **pre;
+};
+
+struct load_head {
+ /*
+ * The lock is about insert load_node and refresh load_node.To the first
+ * load_node of each hash bucket, insert and refresh in this hash bucket is
+ * mutually exclusive.
+ */
+ pthread_mutex_t lock;
+ /*
+ * The rdlock is about read loadavg and delete load_node.To each hash
+ * bucket, read and delete is mutually exclusive. But at the same time, we
+ * allow paratactic read operation. This rdlock is at list level.
+ */
+ pthread_rwlock_t rdlock;
+ /*
+ * The rilock is about read loadavg and insert load_node.To the first
+ * load_node of each hash bucket, read and insert is mutually exclusive.
+ * But at the same time, we allow paratactic read operation.
+ */
+ pthread_rwlock_t rilock;
+ struct load_node *next;
+};
+
+static struct load_head load_hash[LOAD_SIZE]; /* hash table */
+/*
+ * init_load initialize the hash table.
+ * Return 0 on success, return -1 on failure.
+ */
+static int init_load(void)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < LOAD_SIZE; i++) {
+ load_hash[i].next = NULL;
+ ret = pthread_mutex_init(&load_hash[i].lock, NULL);
+ if (ret != 0) {
+ lxcfs_error("%s\n", "Failed to initialize lock");
+ goto out3;
+ }
+ ret = pthread_rwlock_init(&load_hash[i].rdlock, NULL);
+ if (ret != 0) {
+ lxcfs_error("%s\n", "Failed to initialize rdlock");
+ goto out2;
+ }
+ ret = pthread_rwlock_init(&load_hash[i].rilock, NULL);
+ if (ret != 0) {
+ lxcfs_error("%s\n", "Failed to initialize rilock");
+ goto out1;
+ }
+ }
+ return 0;
+out1:
+ pthread_rwlock_destroy(&load_hash[i].rdlock);
+out2:
+ pthread_mutex_destroy(&load_hash[i].lock);
+out3:
+ while (i > 0) {
+ i--;
+ pthread_mutex_destroy(&load_hash[i].lock);
+ pthread_rwlock_destroy(&load_hash[i].rdlock);
+ pthread_rwlock_destroy(&load_hash[i].rilock);
+ }
+ return -1;
+}
+
+static void insert_node(struct load_node **n, int locate)
+{
+ struct load_node *f;
+
+ pthread_mutex_lock(&load_hash[locate].lock);
+ pthread_rwlock_wrlock(&load_hash[locate].rilock);
+ f = load_hash[locate].next;
+ load_hash[locate].next = *n;
+
+ (*n)->pre = &(load_hash[locate].next);
+ if (f)
+ f->pre = &((*n)->next);
+ (*n)->next = f;
+ pthread_mutex_unlock(&load_hash[locate].lock);
+ pthread_rwlock_unlock(&load_hash[locate].rilock);
+}
+/*
+ * locate_node() finds special node. Not return NULL means success.
+ * It should be noted that rdlock isn't unlocked at the end of code
+ * because this function is used to read special node. Delete is not
+ * allowed before read has ended.
+ * unlock rdlock only in proc_loadavg_read().
+ */
+static struct load_node *locate_node(char *cg, int locate)
+{
+ struct load_node *f = NULL;
+ int i = 0;
+
+ pthread_rwlock_rdlock(&load_hash[locate].rilock);
+ pthread_rwlock_rdlock(&load_hash[locate].rdlock);
+ if (load_hash[locate].next == NULL) {
+ pthread_rwlock_unlock(&load_hash[locate].rilock);
+ return f;
+ }
+ f = load_hash[locate].next;
+ pthread_rwlock_unlock(&load_hash[locate].rilock);
+ while (f && ((i = strcmp(f->cg, cg)) != 0))
+ f = f->next;
+ return f;
+}
+/* Delete the load_node n and return the next node of it. */
+static struct load_node *del_node(struct load_node *n, int locate)
+{
+ struct load_node *g;
+
+ pthread_rwlock_wrlock(&load_hash[locate].rdlock);
+ if (n->next == NULL) {
+ *(n->pre) = NULL;
+ } else {
+ *(n->pre) = n->next;
+ n->next->pre = n->pre;
+ }
+ g = n->next;
+ free(n->cg);
+ free(n);
+ pthread_rwlock_unlock(&load_hash[locate].rdlock);
+ return g;
+}
+
/* Reserve buffer size to account for file size changes. */
#define BUF_RESERVE_SIZE 512
* another namespace using the *at() family of functions
* {openat(), fchownat(), ...}. */
static int *fd_hierarchies;
+static int cgroup_mount_ns_fd = -1;
static void unlock_mutex(pthread_mutex_t *l)
{
{
int i;
+ fprintf(stderr, "mount namespace: %d\n", cgroup_mount_ns_fd);
fprintf(stderr, "hierarchies:\n");
for (i = 0; i < num_hierarchies; i++) {
if (hierarchies[i])
char *eol;
while (*memstat) {
- if (startswith(memstat, "cache")) {
- sscanf(memstat + 5, "%lu", cached);
+ if (startswith(memstat, "total_cache")) {
+ sscanf(memstat + 11, "%lu", cached);
*cached /= 1024;
- } else if (startswith(memstat, "active_anon")) {
- sscanf(memstat + 11, "%lu", active_anon);
+ } else if (startswith(memstat, "total_active_anon")) {
+ sscanf(memstat + 17, "%lu", active_anon);
*active_anon /= 1024;
- } else if (startswith(memstat, "inactive_anon")) {
- sscanf(memstat + 13, "%lu", inactive_anon);
+ } else if (startswith(memstat, "total_inactive_anon")) {
+ sscanf(memstat + 19, "%lu", inactive_anon);
*inactive_anon /= 1024;
- } else if (startswith(memstat, "active_file")) {
- sscanf(memstat + 11, "%lu", active_file);
+ } else if (startswith(memstat, "total_active_file")) {
+ sscanf(memstat + 17, "%lu", active_file);
*active_file /= 1024;
- } else if (startswith(memstat, "inactive_file")) {
- sscanf(memstat + 13, "%lu", inactive_file);
+ } else if (startswith(memstat, "total_inactive_file")) {
+ sscanf(memstat + 19, "%lu", inactive_file);
*inactive_file /= 1024;
- } else if (startswith(memstat, "unevictable")) {
- sscanf(memstat + 11, "%lu", unevictable);
+ } else if (startswith(memstat, "total_unevictable")) {
+ sscanf(memstat + 17, "%lu", unevictable);
*unevictable /= 1024;
}
eol = strchr(memstat, '\n');
snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
printme = lbuf;
} else if (startswith(line, "MemAvailable:")) {
- snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
+ snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage + cached);
printme = lbuf;
} else if (startswith(line, "SwapTotal:") && memswlimit > 0) {
sscanf(line+sizeof("SwapTotal:")-1, "%lu", &hostswtotal);
- if (hostswtotal < memswlimit - memlimit)
- memswlimit = hostswtotal + memlimit;
- snprintf(lbuf, 100, "SwapTotal: %8lu kB\n", memswlimit - memlimit);
+ if (hostswtotal < memswlimit)
+ memswlimit = hostswtotal;
+ snprintf(lbuf, 100, "SwapTotal: %8lu kB\n", memswlimit);
printme = lbuf;
} else if (startswith(line, "SwapFree:") && memswlimit > 0 && memswusage > 0) {
- unsigned long swaptotal = memswlimit - memlimit,
+ unsigned long swaptotal = memswlimit,
swapusage = memswusage - memusage,
swapfree = swapusage < swaptotal ? swaptotal - swapusage : 0;
snprintf(lbuf, 100, "SwapFree: %8lu kB\n", swapfree);
return procage;
}
-static uint64_t get_reaper_btime(pid)
-{
- int ret;
- struct sysinfo sys;
- uint64_t procstart;
- uint64_t uptime;
-
- ret = sysinfo(&sys);
- if (ret < 0) {
- lxcfs_debug("%s\n", "failed to retrieve system information");
- return 0;
- }
-
- uptime = (uint64_t)time(NULL) - (uint64_t)sys.uptime;
- procstart = get_reaper_start_time_in_sec(pid);
- return uptime + procstart;
-}
-
#define CPUALL_MAX_SIZE (BUF_RESERVE_SIZE / 2)
static int proc_stat_read(char *buf, size_t size, off_t offset,
struct fuse_file_info *fi)
#endif
if (offset){
- if (offset > d->size)
- return -EINVAL;
if (!d->cached)
return 0;
+ if (offset > d->size)
+ return -EINVAL;
int left = d->size - offset;
total_len = left > size ? size: left;
memcpy(buf, cache + offset, total_len);
if (reaperage >= busytime)
idletime = reaperage - busytime;
- total_len = snprintf(d->buf, d->size, "%"PRIu64".00 %"PRIu64".00\n", reaperage, idletime);
- if (total_len < 0 || total_len >= d->size){
+ total_len = snprintf(d->buf, d->buflen, "%"PRIu64".00 %"PRIu64".00\n", reaperage, idletime);
+ if (total_len < 0 || total_len >= d->buflen){
lxcfs_error("%s\n", "failed to write to cache");
return 0;
}
strcmp(path, "/proc/uptime") == 0 ||
strcmp(path, "/proc/stat") == 0 ||
strcmp(path, "/proc/diskstats") == 0 ||
- strcmp(path, "/proc/swaps") == 0) {
+ strcmp(path, "/proc/swaps") == 0 ||
+ strcmp(path, "/proc/loadavg") == 0) {
sb->st_size = 0;
sb->st_mode = S_IFREG | 00444;
sb->st_nlink = 1;
filler(buf, "stat", NULL, 0) != 0 ||
filler(buf, "uptime", NULL, 0) != 0 ||
filler(buf, "diskstats", NULL, 0) != 0 ||
- filler(buf, "swaps", NULL, 0) != 0)
+ filler(buf, "swaps", NULL, 0) != 0 ||
+ filler(buf, "loadavg", NULL, 0) != 0)
return -EINVAL;
return 0;
}
type = LXC_TYPE_PROC_DISKSTATS;
else if (strcmp(path, "/proc/swaps") == 0)
type = LXC_TYPE_PROC_SWAPS;
+ else if (strcmp(path, "/proc/loadavg") == 0)
+ type = LXC_TYPE_PROC_LOADAVG;
if (type == -1)
return -ENOENT;
return proc_diskstats_read(buf, size, offset, fi);
case LXC_TYPE_PROC_SWAPS:
return proc_swaps_read(buf, size, offset, fi);
+ case LXC_TYPE_PROC_LOADAVG:
+ return proc_loadavg_read(buf, size, offset, fi);
default:
return -EINVAL;
}
return true;
}
+static int preserve_mnt_ns(int pid)
+{
+ int ret;
+ size_t len = sizeof("/proc/") + 21 + sizeof("/ns/mnt");
+ char path[len];
+
+ ret = snprintf(path, len, "/proc/%d/ns/mnt", pid);
+ if (ret < 0 || (size_t)ret >= len)
+ return -1;
+
+ return open(path, O_RDONLY | O_CLOEXEC);
+}
+
static bool cgfs_prepare_mounts(void)
{
if (!mkdir_p(BASEDIR, 0700)) {
return false;
}
+ cgroup_mount_ns_fd = preserve_mnt_ns(getpid());
+ if (cgroup_mount_ns_fd < 0) {
+ lxcfs_error("Failed to preserve mount namespace: %s.\n", strerror(errno));
+ return false;
+ }
+
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0) < 0) {
lxcfs_error("Failed to remount / private: %s.\n", strerror(errno));
return false;
return true;
}
-static int preserve_ns(int pid)
-{
- int ret;
- size_t len = 5 /* /proc */ + 21 /* /int_as_str */ + 7 /* /ns/mnt */ + 1 /* \0 */;
- char path[len];
-
- ret = snprintf(path, len, "/proc/%d/ns/mnt", pid);
- if (ret < 0 || (size_t)ret >= len)
- return -1;
-
- return open(path, O_RDONLY | O_CLOEXEC);
-}
-
static void __attribute__((constructor)) collect_and_mount_subsystems(void)
{
FILE *f;
}
/* Preserve initial namespace. */
- init_ns = preserve_ns(getpid());
+ init_ns = preserve_mnt_ns(getpid());
if (init_ns < 0) {
lxcfs_error("%s\n", "Failed to preserve initial mount namespace.");
goto out;
}
free(hierarchies);
free(fd_hierarchies);
+
+ if (cgroup_mount_ns_fd >= 0)
+ close(cgroup_mount_ns_fd);
}