X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=lxcfs.c;h=62cfd3509751b248df3ae432ad1719cdf2c4aab3;hb=3137a0a63db273005d62973a5d055a7623e98631;hp=c08142b7c183661bf1352784a7fd3220a199c0cf;hpb=f279943021067bfcdb0b9650a1abbb4f4003ab89;p=mirror_lxcfs.git diff --git a/lxcfs.c b/lxcfs.c index c08142b..62cfd35 100644 --- a/lxcfs.c +++ b/lxcfs.c @@ -1,919 +1,430 @@ /* lxcfs * - * Copyright © 2014 Canonical, Inc + * Copyright © 2014-2016 Canonical, Inc * Author: Serge Hallyn * * See COPYING file for details. */ -/* - * NOTES - make sure to run this as -s to avoid threading. - * TODO - can we enforce that here from the code? - */ #define FUSE_USE_VERSION 26 -#include +#include #include +#include +#include #include #include -#include -#include +#include +#include +#include #include -#include -#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -#include -#include +#include "bindings.h" +#include "config.h" // for VERSION -#include "cgmanager.h" +void *dlopen_handle; -struct lxcfs_state { - /* - * a null-terminated, nih-allocated list of the mounted subsystems. We - * detect this at startup. - */ - char **subsystems; -}; -#define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data) +/* Functions to keep track of number of threads using the library */ -/* - * Given a open file * to /proc/pid/{u,g}id_map, and an id - * valid in the caller's namespace, return the id mapped into - * pid's namespace. - * Returns the mapped id, or -1 on error. - */ -unsigned int -convert_id_to_ns(FILE *idfile, unsigned int in_id) +static int users_count; +static pthread_mutex_t user_count_mutex = PTHREAD_MUTEX_INITIALIZER; +static void lock_mutex(pthread_mutex_t *l) { - unsigned int nsuid, // base id for a range in the idfile's namespace - hostuid, // base id for a range in the caller's namespace - count; // number of ids in this range - char line[400]; int ret; - fseek(idfile, 0L, SEEK_SET); - while (fgets(line, 400, idfile)) { - ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count); - if (ret != 3) - continue; - if (hostuid + count < hostuid || nsuid + count < nsuid) { - /* - * uids wrapped around - unexpected as this is a procfile, - * so just bail. - */ - fprintf(stderr, "pid wrapparound at entry %u %u %u in %s", - nsuid, hostuid, count, line); - return -1; - } - if (hostuid <= in_id && hostuid+count > in_id) { - /* - * now since hostuid <= in_id < hostuid+count, and - * hostuid+count and nsuid+count do not wrap around, - * we know that nsuid+(in_id-hostuid) which must be - * less that nsuid+(count) must not wrap around - */ - return (in_id - hostuid) + nsuid; - } + if ((ret = pthread_mutex_lock(l)) != 0) { + lxcfs_error("returned:%d %s\n", ret, strerror(ret)); + exit(1); } - - // no answer found - return -1; } -/* - * for is_privileged_over, - * specify whether we require the calling uid to be root in his - * namespace - */ -#define NS_ROOT_REQD true -#define NS_ROOT_OPT false - -static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root) +static void unlock_mutex(pthread_mutex_t *l) { - nih_local char *fpath = NULL; - bool answer = false; - uid_t nsuid; - - if (victim == -1 || uid == -1) - return false; - - /* - * If the request is one not requiring root in the namespace, - * then having the same uid suffices. (i.e. uid 1000 has write - * access to files owned by uid 1000 - */ - if (!req_ns_root && uid == victim) - return true; - - fpath = NIH_MUST( nih_sprintf(NULL, "/proc/%d/uid_map", pid) ); - FILE *f = fopen(fpath, "r"); - if (!f) - return false; - - /* if caller's not root in his namespace, reject */ - nsuid = convert_id_to_ns(f, uid); - if (nsuid) - goto out; - - /* - * If victim is not mapped into caller's ns, reject. - * XXX I'm not sure this check is needed given that fuse - * will be sending requests where the vfs has converted - */ - nsuid = convert_id_to_ns(f, victim); - if (nsuid == -1) - goto out; - - answer = true; + int ret; -out: - fclose(f); - return answer; + if ((ret = pthread_mutex_unlock(l)) != 0) { + lxcfs_error("returned:%d %s\n", ret, strerror(ret)); + exit(1); + } } -static bool perms_include(int fmode, mode_t req_mode) +static void users_lock(void) { - mode_t r; - - switch (req_mode & O_ACCMODE) { - case O_RDONLY: - r = S_IROTH; - break; - case O_WRONLY: - r = S_IWOTH; - break; - case O_RDWR: - r = S_IROTH | S_IWOTH; - break; - default: - return false; - } - return ((fmode & r) == r); + lock_mutex(&user_count_mutex); } -static char *get_next_cgroup_dir(const char *taskcg, const char *querycg) +static void users_unlock(void) { - char *start, *end; - - if (strlen(taskcg) <= strlen(querycg)) { - fprintf(stderr, "%s: I was fed bad input\n", __func__); - return NULL; - } - - if (strcmp(querycg, "/") == 0) - start = NIH_MUST( nih_strdup(NULL, taskcg + 1) ); - else - start = NIH_MUST( nih_strdup(NULL, taskcg + strlen(querycg) + 1) ); - end = strchr(start, '/'); - if (end) - *end = '\0'; - return start; + unlock_mutex(&user_count_mutex); } -/* - * check whether a fuse context may access a cgroup dir or file - * - * If file is not null, it is a cgroup file to check under cg. - * If file is null, then we are checking perms on cg itself. - * - * For files we can check the mode of the list_keys result. - * For cgroups, we must make assumptions based on the files under the - * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups - * yet. - */ -static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode) +static volatile sig_atomic_t need_reload; + +/* do_reload - reload the dynamic library. Done under + * lock and when we know the user_count was 0 */ +static void do_reload(void) { - nih_local struct cgm_keys **list = NULL; - int i; + char lxcfs_lib_path[PATH_MAX]; + if (dlopen_handle) { + lxcfs_debug("%s\n", "Closing liblxcfs.so handle."); + dlclose(dlopen_handle); + } - if (!file) - file = "tasks"; - - if (*file == '/') - file++; - - if (!cgm_list_keys(contrl, cg, &list)) - return false; - for (i = 0; list[i]; i++) { - if (strcmp(list[i]->name, file) == 0) { - struct cgm_keys *k = list[i]; - if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) { - if (perms_include(k->mode >> 6, mode)) - return true; - } - if (fc->gid == k->gid) { - if (perms_include(k->mode >> 3, mode)) - return true; - } - return perms_include(k->mode, mode); - } + /* First try loading using ld.so */ + dlopen_handle = dlopen("liblxcfs.so", RTLD_LAZY); + if (dlopen_handle) { + lxcfs_debug("%s\n", "Successfully called dlopen() on liblxcfs.so."); + goto good; } - return false; -} +#ifdef LIBDIR + /* LIBDIR: autoconf will setup this MACRO. Default value is $PREFIX/lib */ + snprintf(lxcfs_lib_path, PATH_MAX, "%s/lxcfs/liblxcfs.so", LIBDIR); +#else + snprintf(lxcfs_lib_path, PATH_MAX, "/usr/local/lib/lxcfs/liblxcfs.so"); +#endif + dlopen_handle = dlopen(lxcfs_lib_path, RTLD_LAZY); + if (!dlopen_handle) { + lxcfs_error("Failed to open liblxcfs.so: %s.\n", dlerror()); + _exit(1); + } -static void stripnewline(char *x) -{ - size_t l = strlen(x); - if (l && x[l-1] == '\n') - x[l-1] = '\0'; +good: + if (need_reload) + lxcfs_error("%s\n", "lxcfs: reloaded"); + need_reload = 0; } -/* - * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d. - * If caller is in /a, he may act on /a/b, but not on /b. - * if the answer is false and nextcg is not NULL, then *nextcg will point - * to a nih_alloc'd string containing the next cgroup directory under cg - */ -static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg) +static void up_users(void) { - nih_local char *fnam = NULL; - FILE *f; - bool answer = false; - char *line = NULL; - size_t len = 0; - - fnam = NIH_MUST( nih_sprintf(NULL, "/proc/%d/cgroup", pid) ); - if (!(f = fopen(fnam, "r"))) - return false; - - while (getline(&line, &len, f) != -1) { - char *c1, *c2, *linecmp; - if (!line[0]) - continue; - c1 = strchr(line, ':'); - if (!c1) - goto out; - c1++; - c2 = strchr(c1, ':'); - if (!c2) - goto out; - *c2 = '\0'; - if (strcmp(c1, contrl) != 0) - continue; - c2++; - stripnewline(c2); - /* - * callers pass in '/' for root cgroup, otherwise they pass - * in a cgroup without leading '/' - */ - linecmp = *cg == '/' ? c2 : c2+1; - if (strncmp(linecmp, cg, strlen(linecmp)) != 0) { - if (nextcg) - *nextcg = get_next_cgroup_dir(linecmp, cg); - goto out; - } - answer = true; - goto out; - } - -out: - fclose(f); - free(line); - return answer; + users_lock(); + if (users_count == 0 && need_reload) + do_reload(); + users_count++; + users_unlock(); } -/* - * given /cgroup/freezer/a/b, return "freezer". this will be nih-allocated - * and needs to be nih_freed. - */ -static char *pick_controller_from_path(struct fuse_context *fc, const char *path) +static void down_users(void) { - const char *p1; - char *ret, *slash; - - if (strlen(path) < 9) - return NULL; - p1 = path+8; - ret = nih_strdup(NULL, p1); - if (!ret) - return ret; - slash = strstr(ret, "/"); - if (slash) - *slash = '\0'; - - /* verify that it is a subsystem */ - char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL; - int i; - if (!list) { - nih_free(ret); - return NULL; - } - for (i = 0; list[i]; i++) { - if (strcmp(list[i], ret) == 0) - return ret; - } - nih_free(ret); - return NULL; + users_lock(); + users_count--; + users_unlock(); } -/* - * Find the start of cgroup in /cgroup/controller/the/cgroup/path - * Note that the returned value may include files (keynames) etc - */ -static const char *find_cgroup_in_path(const char *path) +static void reload_handler(int sig) { - const char *p1; - - if (strlen(path) < 9) - return NULL; - p1 = strstr(path+8, "/"); - if (!p1) - return NULL; - return p1+1; + need_reload = 1; } -static bool is_child_cgroup(const char *contr, const char *dir, const char *f) +/* Functions to run the library methods */ +static int do_cg_getattr(const char *path, struct stat *sb) { - nih_local char **list = NULL; - int i; - - if (!f) - return false; - if (*f == '/') - f++; - - if (!cgm_list_children(contr, dir, &list)) - return false; - for (i = 0; list[i]; i++) { - if (strcmp(list[i], f) == 0) - return true; + int (*cg_getattr)(const char *path, struct stat *sb); + char *error; + dlerror(); /* Clear any existing error */ + cg_getattr = (int (*)(const char *, struct stat *)) dlsym(dlopen_handle, "cg_getattr"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - return false; + return cg_getattr(path, sb); } -static struct cgm_keys *get_cgroup_key(const char *contr, const char *dir, const char *f) +static int do_proc_getattr(const char *path, struct stat *sb) { - nih_local struct cgm_keys **list = NULL; - struct cgm_keys *k; - int i; - - if (!f) - return NULL; - if (*f == '/') - f++; - if (!cgm_list_keys(contr, dir, &list)) - return NULL; - for (i = 0; list[i]; i++) { - if (strcmp(list[i]->name, f) == 0) { - k = NIH_MUST( nih_alloc(NULL, (sizeof(*k))) ); - k->name = NIH_MUST( nih_strdup(k, list[i]->name) ); - k->uid = list[i]->uid; - k->gid = list[i]->gid; - k->mode = list[i]->mode; - return k; - } + int (*proc_getattr)(const char *path, struct stat *sb); + char *error; + dlerror(); /* Clear any existing error */ + proc_getattr = (int (*)(const char *, struct stat *)) dlsym(dlopen_handle, "proc_getattr"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - return NULL; + return proc_getattr(path, sb); } -static void get_cgdir_and_path(const char *cg, char **dir, char **file) +static int do_cg_read(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) { - char *p; + int (*cg_read)(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi); + char *error; - *dir = NIH_MUST( nih_strdup(NULL, cg) ); - *file = strrchr(cg, '/'); - if (!*file) { - *file = NULL; - return; + dlerror(); /* Clear any existing error */ + cg_read = (int (*)(const char *, char *, size_t, off_t, struct fuse_file_info *)) dlsym(dlopen_handle, "cg_read"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - p = strrchr(*dir, '/'); - *p = '\0'; -} -static size_t get_file_size(const char *contrl, const char *cg, const char *f) -{ - nih_local char *data = NULL; - size_t s; - if (!cgm_get_value(contrl, cg, f, &data)) - return -EINVAL; - s = strlen(data); - return s; + return cg_read(path, buf, size, offset, fi); } -/* - * FUSE ops for /cgroup - */ - -static int cg_getattr(const char *path, struct stat *sb) +static int do_proc_read(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) { - struct timespec now; - struct fuse_context *fc = fuse_get_context(); - nih_local char * cgdir = NULL; - char *fpath = NULL, *path1, *path2; - nih_local struct cgm_keys *k = NULL; - const char *cgroup; - nih_local char *controller = NULL; - - - if (!fc) - return -EIO; - - memset(sb, 0, sizeof(struct stat)); - - if (clock_gettime(CLOCK_REALTIME, &now) < 0) - return -EINVAL; - - sb->st_uid = sb->st_gid = 0; - sb->st_atim = sb->st_mtim = sb->st_ctim = now; - sb->st_size = 0; - - if (strcmp(path, "/cgroup") == 0) { - sb->st_mode = S_IFDIR | 00755; - sb->st_nlink = 2; - return 0; - } - - controller = pick_controller_from_path(fc, path); - if (!controller) - return -EIO; - cgroup = find_cgroup_in_path(path); - if (!cgroup) { -empty: - /* this is just /cgroup/controller, return it as a dir */ - sb->st_mode = S_IFDIR | 00755; - sb->st_nlink = 2; - return 0; - } - - get_cgdir_and_path(cgroup, &cgdir, &fpath); - - if (!fpath) { - path1 = "/"; - path2 = cgdir; - } else { - path1 = cgdir; - path2 = fpath; - } - - /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys. - * Then check that caller's cgroup is under path if fpath is a child - * cgroup, or cgdir if fpath is a file */ - - if (is_child_cgroup(controller, path1, path2)) { - if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) - goto empty; - if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) - return -EPERM; + int (*proc_read)(const char *path, char *buf, size_t size, off_t offset, + struct fuse_file_info *fi); + char *error; - // get uid, gid, from '/tasks' file and make up a mode - // That is a hack, until cgmanager gains a GetCgroupPerms fn. - sb->st_mode = S_IFDIR | 00755; - k = get_cgroup_key(controller, cgroup, "tasks"); - if (!k) { - sb->st_uid = sb->st_gid = 0; - } else { - sb->st_uid = k->uid; - sb->st_gid = k->gid; - } - sb->st_nlink = 2; - return 0; - } - - if ((k = get_cgroup_key(controller, path1, path2)) != NULL) { - if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) - return -ENOENT; - if (!fc_may_access(fc, controller, path1, path2, O_RDONLY)) - return -EPERM; - - sb->st_mode = S_IFREG | k->mode; - sb->st_nlink = 1; - sb->st_uid = k->uid; - sb->st_gid = k->gid; - sb->st_size = get_file_size(controller, path1, path2); - return 0; + dlerror(); /* Clear any existing error */ + proc_read = (int (*)(const char *, char *, size_t, off_t, struct fuse_file_info *)) dlsym(dlopen_handle, "proc_read"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - return -ENOENT; + return proc_read(path, buf, size, offset, fi); } -static int cg_opendir(const char *path, struct fuse_file_info *fi) +static int do_cg_write(const char *path, const char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) { - return 0; + int (*cg_write)(const char *path, const char *buf, size_t size, off_t offset, + struct fuse_file_info *fi); + char *error; + dlerror(); /* Clear any existing error */ + cg_write = (int (*)(const char *, const char *, size_t, off_t, struct fuse_file_info *)) dlsym(dlopen_handle, "cg_write"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; + } + + return cg_write(path, buf, size, offset, fi); } -static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, - struct fuse_file_info *fi) +static int do_cg_mkdir(const char *path, mode_t mode) { - struct fuse_context *fc = fuse_get_context(); - - if (!fc) - return -EIO; - - if (strcmp(path, "/cgroup") == 0) { - // get list of controllers - char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL; - int i; - - if (!list) - return -EIO; - /* TODO - collect the list of controllers at fuse_init */ - for (i = 0; list[i]; i++) { - if (filler(buf, list[i], NULL, 0) != 0) { - return -EIO; - } - } - return 0; + int (*cg_mkdir)(const char *path, mode_t mode); + char *error; + dlerror(); /* Clear any existing error */ + cg_mkdir = (int (*)(const char *, mode_t)) dlsym(dlopen_handle, "cg_mkdir"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - // return list of keys for the controller, and list of child cgroups - nih_local struct cgm_keys **list = NULL; - const char *cgroup; - nih_local char *controller = NULL; - int i; - nih_local char *nextcg = NULL; - - controller = pick_controller_from_path(fc, path); - if (!controller) - return -EIO; + return cg_mkdir(path, mode); +} - cgroup = find_cgroup_in_path(path); - if (!cgroup) { - /* this is just /cgroup/controller, return its contents */ - cgroup = "/"; +static int do_cg_chown(const char *path, uid_t uid, gid_t gid) +{ + int (*cg_chown)(const char *path, uid_t uid, gid_t gid); + char *error; + dlerror(); /* Clear any existing error */ + cg_chown = (int (*)(const char *, uid_t, gid_t)) dlsym(dlopen_handle, "cg_chown"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) - return -EPERM; - - if (!cgm_list_keys(controller, cgroup, &list)) - // not a valid cgroup - return -EINVAL; - - if (!caller_is_in_ancestor(fc->pid, controller, cgroup, &nextcg)) { - if (nextcg) { - int ret; - ret = filler(buf, nextcg, NULL, 0); - if (ret != 0) - return -EIO; - } - return 0; - } + return cg_chown(path, uid, gid); +} - for (i = 0; list[i]; i++) { - if (filler(buf, list[i]->name, NULL, 0) != 0) { - return -EIO; - } +static int do_cg_rmdir(const char *path) +{ + int (*cg_rmdir)(const char *path); + char *error; + dlerror(); /* Clear any existing error */ + cg_rmdir = (int (*)(const char *path)) dlsym(dlopen_handle, "cg_rmdir"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - // now get the list of child cgroups - nih_local char **clist; - - if (!cgm_list_children(controller, cgroup, &clist)) - return 0; - for (i = 0; clist[i]; i++) { - if (filler(buf, clist[i], NULL, 0) != 0) { - return -EIO; - } - } - return 0; + return cg_rmdir(path); } -static int cg_releasedir(const char *path, struct fuse_file_info *fi) +static int do_cg_chmod(const char *path, mode_t mode) { - return 0; + int (*cg_chmod)(const char *path, mode_t mode); + char *error; + dlerror(); /* Clear any existing error */ + cg_chmod = (int (*)(const char *, mode_t)) dlsym(dlopen_handle, "cg_chmod"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; + } + + return cg_chmod(path, mode); } -static int cg_open(const char *path, struct fuse_file_info *fi) +static int do_cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, + struct fuse_file_info *fi) { - nih_local char *controller = NULL; - const char *cgroup; - char *fpath = NULL, *path1, *path2; - nih_local char * cgdir = NULL; - nih_local struct cgm_keys *k = NULL; - struct fuse_context *fc = fuse_get_context(); - - if (!fc) - return -EIO; - - controller = pick_controller_from_path(fc, path); - if (!controller) - return -EIO; - cgroup = find_cgroup_in_path(path); - if (!cgroup) - return -EINVAL; - - get_cgdir_and_path(cgroup, &cgdir, &fpath); - if (!fpath) { - path1 = "/"; - path2 = cgdir; - } else { - path1 = cgdir; - path2 = fpath; - } + int (*cg_readdir)(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, + struct fuse_file_info *fi); + char *error; - if ((k = get_cgroup_key(controller, path1, path2)) != NULL) { - if (!fc_may_access(fc, controller, path1, path2, fi->flags)) - return -EPERM; - - /* TODO - we want to cache this info for read/write */ - return 0; + dlerror(); /* Clear any existing error */ + cg_readdir = (int (*)(const char *, void *, fuse_fill_dir_t, off_t, struct fuse_file_info *)) dlsym(dlopen_handle, "cg_readdir"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - return -EINVAL; + return cg_readdir(path, buf, filler, offset, fi); } -static int cg_read(const char *path, char *buf, size_t size, off_t offset, +static int do_proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi) { - nih_local char *controller = NULL; - const char *cgroup; - char *fpath = NULL, *path1, *path2; - struct fuse_context *fc = fuse_get_context(); - nih_local char * cgdir = NULL; - nih_local struct cgm_keys *k = NULL; - - if (offset) - return -EIO; - - if (!fc) - return -EIO; - - controller = pick_controller_from_path(fc, path); - if (!controller) - return -EIO; - cgroup = find_cgroup_in_path(path); - if (!cgroup) - return -EINVAL; - - get_cgdir_and_path(cgroup, &cgdir, &fpath); - if (!fpath) { - path1 = "/"; - path2 = cgdir; - } else { - path1 = cgdir; - path2 = fpath; - } - - if ((k = get_cgroup_key(controller, path1, path2)) != NULL) { - nih_local char *data = NULL; - int s; - - if (!fc_may_access(fc, controller, path1, path2, O_RDONLY)) - return -EPERM; - - if (!cgm_get_value(controller, path1, path2, &data)) - return -EINVAL; - - s = strlen(data); - if (s > size) - s = size; - memcpy(buf, data, s); + int (*proc_readdir)(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, + struct fuse_file_info *fi); + char *error; - return s; + dlerror(); /* Clear any existing error */ + proc_readdir = (int (*)(const char *, void *, fuse_fill_dir_t, off_t, struct fuse_file_info *)) dlsym(dlopen_handle, "proc_readdir"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - return -EINVAL; + return proc_readdir(path, buf, filler, offset, fi); } -int cg_write(const char *path, const char *buf, size_t size, off_t offset, - struct fuse_file_info *fi) +static int do_cg_open(const char *path, struct fuse_file_info *fi) { - nih_local char *controller = NULL; - const char *cgroup; - char *fpath = NULL, *path1, *path2; - struct fuse_context *fc = fuse_get_context(); - nih_local char * cgdir = NULL; - nih_local struct cgm_keys *k = NULL; - - if (offset) - return -EIO; - - if (!fc) - return -EIO; - - controller = pick_controller_from_path(fc, path); - if (!controller) - return -EIO; - cgroup = find_cgroup_in_path(path); - if (!cgroup) - return -EINVAL; - - get_cgdir_and_path(cgroup, &cgdir, &fpath); - if (!fpath) { - path1 = "/"; - path2 = cgdir; - } else { - path1 = cgdir; - path2 = fpath; + int (*cg_open)(const char *path, struct fuse_file_info *fi); + char *error; + dlerror(); /* Clear any existing error */ + cg_open = (int (*)(const char *, struct fuse_file_info *)) dlsym(dlopen_handle, "cg_open"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - if ((k = get_cgroup_key(controller, path1, path2)) != NULL) { - if (!fc_may_access(fc, controller, path1, path2, O_WRONLY)) - return -EPERM; - - if (!cgm_set_value(controller, path1, path2, buf)) - return -EINVAL; + return cg_open(path, fi); +} - return size; +static int do_cg_access(const char *path, int mode) +{ + int (*cg_access)(const char *path, int mode); + char *error; + dlerror(); /* Clear any existing error */ + cg_access = (int (*)(const char *, int mode)) dlsym(dlopen_handle, "cg_access"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - return -EINVAL; + return cg_access(path, mode); } -int cg_chown(const char *path, uid_t uid, gid_t gid) +static int do_proc_open(const char *path, struct fuse_file_info *fi) { - struct fuse_context *fc = fuse_get_context(); - nih_local char * cgdir = NULL; - char *fpath = NULL, *path1, *path2; - nih_local struct cgm_keys *k = NULL; - const char *cgroup; - nih_local char *controller = NULL; - - - if (!fc) - return -EIO; - - if (strcmp(path, "/cgroup") == 0) - return -EINVAL; - - controller = pick_controller_from_path(fc, path); - if (!controller) - return -EIO; - cgroup = find_cgroup_in_path(path); - if (!cgroup) - /* this is just /cgroup/controller */ - return -EINVAL; - - get_cgdir_and_path(cgroup, &cgdir, &fpath); - - if (!fpath) { - path1 = "/"; - path2 = cgdir; - } else { - path1 = cgdir; - path2 = fpath; + int (*proc_open)(const char *path, struct fuse_file_info *fi); + char *error; + dlerror(); /* Clear any existing error */ + proc_open = (int (*)(const char *path, struct fuse_file_info *fi)) dlsym(dlopen_handle, "proc_open"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - if (is_child_cgroup(controller, path1, path2)) { - // get uid, gid, from '/tasks' file and make up a mode - // That is a hack, until cgmanager gains a GetCgroupPerms fn. - k = get_cgroup_key(controller, cgroup, "tasks"); - - } else - k = get_cgroup_key(controller, path1, path2); - - if (!k) - return -EINVAL; - - /* - * This being a fuse request, the uid and gid must be valid - * in the caller's namespace. So we can just check to make - * sure that the caller is root in his uid, and privileged - * over the file's current owner. - */ - if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) - return -EPERM; - - if (!cgm_chown_file(controller, cgroup, uid, gid)) - return -EINVAL; - return 0; + return proc_open(path, fi); } -int cg_chmod(const char *path, mode_t mode) +static int do_proc_access(const char *path, int mode) { - struct fuse_context *fc = fuse_get_context(); - nih_local char * cgdir = NULL; - char *fpath = NULL, *path1, *path2; - nih_local struct cgm_keys *k = NULL; - const char *cgroup; - nih_local char *controller = NULL; - - if (!fc) - return -EIO; - - if (strcmp(path, "/cgroup") == 0) - return -EINVAL; - - controller = pick_controller_from_path(fc, path); - if (!controller) - return -EIO; - cgroup = find_cgroup_in_path(path); - if (!cgroup) - /* this is just /cgroup/controller */ - return -EINVAL; - - get_cgdir_and_path(cgroup, &cgdir, &fpath); - - if (!fpath) { - path1 = "/"; - path2 = cgdir; - } else { - path1 = cgdir; - path2 = fpath; + int (*proc_access)(const char *path, int mode); + char *error; + dlerror(); /* Clear any existing error */ + proc_access = (int (*)(const char *, int mode)) dlsym(dlopen_handle, "proc_access"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; } - if (is_child_cgroup(controller, path1, path2)) { - // get uid, gid, from '/tasks' file and make up a mode - // That is a hack, until cgmanager gains a GetCgroupPerms fn. - k = get_cgroup_key(controller, cgroup, "tasks"); - - } else - k = get_cgroup_key(controller, path1, path2); - - if (!k) - return -EINVAL; - - /* - * This being a fuse request, the uid and gid must be valid - * in the caller's namespace. So we can just check to make - * sure that the caller is root in his uid, and privileged - * over the file's current owner. - */ - if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) - return -EPERM; - - if (!cgm_chmod_file(controller, cgroup, mode)) - return -EINVAL; - return 0; + return proc_access(path, mode); } -int cg_mkdir(const char *path, mode_t mode) +static int do_cg_release(const char *path, struct fuse_file_info *fi) { - struct fuse_context *fc = fuse_get_context(); - nih_local struct cgm_keys **list = NULL; - char *fpath = NULL, *path1; - nih_local char * cgdir = NULL; - const char *cgroup; - nih_local char *controller = NULL; - - if (!fc) - return -EIO; - - - controller = pick_controller_from_path(fc, path); - if (!controller) - return -EIO; - - cgroup = find_cgroup_in_path(path); - if (!cgroup) - return -EIO; - - get_cgdir_and_path(cgroup, &cgdir, &fpath); - if (!fpath) - path1 = "/"; - else - path1 = cgdir; - - if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) - return -EPERM; - - - if (!cgm_create(controller, cgroup, fc->uid, fc->gid)) - return -EINVAL; + int (*cg_release)(const char *path, struct fuse_file_info *fi); + char *error; + dlerror(); /* Clear any existing error */ + cg_release = (int (*)(const char *path, struct fuse_file_info *)) dlsym(dlopen_handle, "cg_release"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; + } - return 0; + return cg_release(path, fi); } -static int cg_rmdir(const char *path) +static int do_proc_release(const char *path, struct fuse_file_info *fi) { - struct fuse_context *fc = fuse_get_context(); - nih_local struct cgm_keys **list = NULL; - char *fpath = NULL; - nih_local char * cgdir = NULL; - const char *cgroup; - nih_local char *controller = NULL; - - if (!fc) - return -EIO; - - - controller = pick_controller_from_path(fc, path); - if (!controller) - return -EIO; - - cgroup = find_cgroup_in_path(path); - if (!cgroup) - return -EIO; - - get_cgdir_and_path(cgroup, &cgdir, &fpath); - if (!fpath) - return -EINVAL; + int (*proc_release)(const char *path, struct fuse_file_info *fi); + char *error; + dlerror(); /* Clear any existing error */ + proc_release = (int (*)(const char *path, struct fuse_file_info *)) dlsym(dlopen_handle, "proc_release"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; + } - if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) - return -EPERM; + return proc_release(path, fi); +} - if (!cgm_remove(controller, cgroup)) - return -EINVAL; +static int do_cg_opendir(const char *path, struct fuse_file_info *fi) +{ + int (*cg_opendir)(const char *path, struct fuse_file_info *fi); + char *error; + dlerror(); /* Clear any existing error */ + cg_opendir = (int (*)(const char *path, struct fuse_file_info *fi)) dlsym(dlopen_handle, "cg_opendir"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; + } - return 0; + return cg_opendir(path, fi); } -/* - * FUSE ops for /proc - */ - -static int proc_getattr(const char *path, struct stat *sb) +static int do_cg_releasedir(const char *path, struct fuse_file_info *fi) { - if (strcmp(path, "/proc") != 0) - return -EINVAL; - sb->st_mode = S_IFDIR | 00755; - sb->st_nlink = 2; - return 0; + int (*cg_releasedir)(const char *path, struct fuse_file_info *fi); + char *error; + dlerror(); /* Clear any existing error */ + cg_releasedir = (int (*)(const char *path, struct fuse_file_info *)) dlsym(dlopen_handle, "cg_releasedir"); + error = dlerror(); + if (error != NULL) { + lxcfs_error("%s\n", error); + return -1; + } + + return cg_releasedir(path, fi); } /* @@ -924,70 +435,152 @@ static int proc_getattr(const char *path, struct stat *sb) static int lxcfs_getattr(const char *path, struct stat *sb) { + int ret; + struct timespec now; + if (strcmp(path, "/") == 0) { + if (clock_gettime(CLOCK_REALTIME, &now) < 0) + return -EINVAL; + sb->st_uid = sb->st_gid = 0; + sb->st_atim = sb->st_mtim = sb->st_ctim = now; + sb->st_size = 0; sb->st_mode = S_IFDIR | 00755; sb->st_nlink = 2; return 0; } + if (strncmp(path, "/cgroup", 7) == 0) { - return cg_getattr(path, sb); + up_users(); + ret = do_cg_getattr(path, sb); + down_users(); + return ret; } - if (strncmp(path, "/proc", 7) == 0) { - return proc_getattr(path, sb); + if (strncmp(path, "/proc", 5) == 0) { + up_users(); + ret = do_proc_getattr(path, sb); + down_users(); + return ret; } - return -EINVAL; + return -ENOENT; } static int lxcfs_opendir(const char *path, struct fuse_file_info *fi) { + int ret; if (strcmp(path, "/") == 0) return 0; if (strncmp(path, "/cgroup", 7) == 0) { - return cg_opendir(path, fi); + up_users(); + ret = do_cg_opendir(path, fi); + down_users(); + return ret; } - return -EINVAL; + if (strcmp(path, "/proc") == 0) + return 0; + return -ENOENT; } static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi) { + int ret; if (strcmp(path, "/") == 0) { - if (filler(buf, "proc", NULL, 0) != 0 || - filler(buf, "cgroup", NULL, 0) != 0) - return -EINVAL; + if (filler(buf, ".", NULL, 0) != 0 || + filler(buf, "..", NULL, 0) != 0 || + filler(buf, "proc", NULL, 0) != 0 || + filler(buf, "cgroup", NULL, 0) != 0) + return -ENOMEM; return 0; } if (strncmp(path, "/cgroup", 7) == 0) { - return cg_readdir(path, buf, filler, offset, fi); + up_users(); + ret = do_cg_readdir(path, buf, filler, offset, fi); + down_users(); + return ret; } - return -EINVAL; + if (strcmp(path, "/proc") == 0) { + up_users(); + ret = do_proc_readdir(path, buf, filler, offset, fi); + down_users(); + return ret; + } + return -ENOENT; +} + +static int lxcfs_access(const char *path, int mode) +{ + int ret; + + if (strcmp(path, "/") == 0 && (mode & W_OK) == 0) + return 0; + + if (strncmp(path, "/cgroup", 7) == 0) { + up_users(); + ret = do_cg_access(path, mode); + down_users(); + return ret; + } + if (strncmp(path, "/proc", 5) == 0) { + up_users(); + ret = do_proc_access(path, mode); + down_users(); + return ret; + } + + return -EACCES; } static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi) { + int ret; if (strcmp(path, "/") == 0) return 0; if (strncmp(path, "/cgroup", 7) == 0) { - return cg_releasedir(path, fi); + up_users(); + ret = do_cg_releasedir(path, fi); + down_users(); + return ret; } + if (strcmp(path, "/proc") == 0) + return 0; return -EINVAL; } static int lxcfs_open(const char *path, struct fuse_file_info *fi) { + int ret; if (strncmp(path, "/cgroup", 7) == 0) { - return cg_open(path, fi); + up_users(); + ret = do_cg_open(path, fi); + down_users(); + return ret; + } + if (strncmp(path, "/proc", 5) == 0) { + up_users(); + ret = do_proc_open(path, fi); + down_users(); + return ret; } - return -EINVAL; + return -EACCES; } static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi) { + int ret; if (strncmp(path, "/cgroup", 7) == 0) { - return cg_read(path, buf, size, offset, fi); + up_users(); + ret = do_cg_read(path, buf, size, offset, fi); + down_users(); + return ret; + } + if (strncmp(path, "/proc", 5) == 0) { + up_users(); + ret = do_proc_read(path, buf, size, offset, fi); + down_users(); + return ret; } return -EINVAL; @@ -996,8 +589,12 @@ static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset, int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset, struct fuse_file_info *fi) { + int ret; if (strncmp(path, "/cgroup", 7) == 0) { - return cg_write(path, buf, size, offset, fi); + up_users(); + ret = do_cg_write(path, buf, size, offset, fi); + down_users(); + return ret; } return -EINVAL; @@ -1010,7 +607,21 @@ static int lxcfs_flush(const char *path, struct fuse_file_info *fi) static int lxcfs_release(const char *path, struct fuse_file_info *fi) { - return 0; + int ret; + if (strncmp(path, "/cgroup", 7) == 0) { + up_users(); + ret = do_cg_release(path, fi); + down_users(); + return ret; + } + if (strncmp(path, "/proc", 5) == 0) { + up_users(); + ret = do_proc_release(path, fi); + down_users(); + return ret; + } + + return -EINVAL; } static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi) @@ -1020,18 +631,31 @@ static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi int lxcfs_mkdir(const char *path, mode_t mode) { - if (strncmp(path, "/cgroup", 7) == 0) - return cg_mkdir(path, mode); + int ret; + if (strncmp(path, "/cgroup", 7) == 0) { + up_users(); + ret = do_cg_mkdir(path, mode); + down_users(); + return ret; + } - return -EINVAL; + return -EPERM; } int lxcfs_chown(const char *path, uid_t uid, gid_t gid) { - if (strncmp(path, "/cgroup", 7) == 0) - return cg_chown(path, uid, gid); + int ret; + if (strncmp(path, "/cgroup", 7) == 0) { + up_users(); + ret = do_cg_chown(path, uid, gid); + down_users(); + return ret; + } - return -EINVAL; + if (strncmp(path, "/proc", 5) == 0) + return -EPERM; + + return -ENOENT; } /* @@ -1043,21 +667,35 @@ int lxcfs_truncate(const char *path, off_t newsize) { if (strncmp(path, "/cgroup", 7) == 0) return 0; - return -EINVAL; + return -EPERM; } int lxcfs_rmdir(const char *path) { - if (strncmp(path, "/cgroup", 7) == 0) - return cg_rmdir(path); - return -EINVAL; + int ret; + if (strncmp(path, "/cgroup", 7) == 0) { + up_users(); + ret = do_cg_rmdir(path); + down_users(); + return ret; + } + return -EPERM; } int lxcfs_chmod(const char *path, mode_t mode) { - if (strncmp(path, "/cgroup", 7) == 0) - return cg_chmod(path, mode); - return -EINVAL; + int ret; + if (strncmp(path, "/cgroup", 7) == 0) { + up_users(); + ret = do_cg_chmod(path, mode); + down_users(); + return ret; + } + + if (strncmp(path, "/proc", 5) == 0) + return -EPERM; + + return -ENOENT; } const struct fuse_operations lxcfs_ops = { @@ -1097,17 +735,20 @@ const struct fuse_operations lxcfs_ops = { .fsyncdir = NULL, .init = NULL, .destroy = NULL, - .access = NULL, + .access = lxcfs_access, .create = NULL, .ftruncate = NULL, .fgetattr = NULL, }; -static void usage(const char *me) +static void usage() { fprintf(stderr, "Usage:\n"); fprintf(stderr, "\n"); - fprintf(stderr, "%s [FUSE and mount options] mountpoint\n", me); + fprintf(stderr, "lxcfs [-f|-d] [-p pidfile] mountpoint\n"); + fprintf(stderr, " -f running foreground by default; -d enable debug output \n"); + fprintf(stderr, " Default pidfile is %s/lxcfs.pid\n", RUNTIME_PATH); + fprintf(stderr, "lxcfs -h\n"); exit(1); } @@ -1121,25 +762,155 @@ static bool is_help(char *w) return false; } -int main(int argc, char *argv[]) +bool swallow_arg(int *argcp, char *argv[], char *which) { - int ret; - struct lxcfs_state *d; + int i; + + for (i = 1; argv[i]; i++) { + if (strcmp(argv[i], which) != 0) + continue; + for (; argv[i]; i++) { + argv[i] = argv[i+1]; + } + (*argcp)--; + return true; + } + return false; +} - if (argc < 2 || is_help(argv[1])) - usage(argv[0]); +bool swallow_option(int *argcp, char *argv[], char *opt, char **v) +{ + int i; - d = malloc(sizeof(*d)); - if (!d) + for (i = 1; argv[i]; i++) { + if (!argv[i+1]) + continue; + if (strcmp(argv[i], opt) != 0) + continue; + do { + *v = strdup(argv[i+1]); + } while (!*v); + for (; argv[i+1]; i++) { + argv[i] = argv[i+2]; + } + (*argcp) -= 2; + return true; + } + return false; +} + +static int set_pidfile(char *pidfile) +{ + int fd; + char buf[50]; + struct flock fl; + + fl.l_type = F_WRLCK; + fl.l_whence = SEEK_SET; + fl.l_start = 0; + fl.l_len = 0; + + fd = open(pidfile, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); + if (fd == -1) { + fprintf(stderr, "Could not open pidfile %s: %m\n", pidfile); return -1; + } + + if (fcntl(fd, F_SETLK, &fl) == -1) { + if (errno == EAGAIN || errno == EACCES) { + fprintf(stderr, "PID file '%s' is already locked.\n", pidfile); + close(fd); + return -1; + } + fprintf(stderr, "Warning; unable to lock PID file, proceeding.\n"); + } - if (!cgm_escape_cgroup()) - fprintf(stderr, "WARNING: failed to escape to root cgroup\n"); + if (ftruncate(fd, 0) == -1) { + fprintf(stderr, "Error truncating PID file '%s': %m", pidfile); + close(fd); + return -1; + } - if (!cgm_get_controllers(&d->subsystems)) + snprintf(buf, 50, "%ld\n", (long) getpid()); + if (write(fd, buf, strlen(buf)) != strlen(buf)) { + fprintf(stderr, "Error writing to PID file '%s': %m", pidfile); + close(fd); return -1; + } + + return fd; +} - ret = fuse_main(argc, argv, &lxcfs_ops, d); +int main(int argc, char *argv[]) +{ + int ret = EXIT_FAILURE; + int pidfd = -1; + char *pidfile = NULL, *v = NULL; + size_t pidfile_len; + bool debug = false; + /* + * what we pass to fuse_main is: + * argv[0] -s [-f|-d] -o allow_other,directio argv[1] NULL + */ + int nargs = 5, cnt = 0; + char *newargv[6]; + + /* accomodate older init scripts */ + swallow_arg(&argc, argv, "-s"); + swallow_arg(&argc, argv, "-f"); + debug = swallow_arg(&argc, argv, "-d"); + if (swallow_option(&argc, argv, "-o", &v)) { + if (strcmp(v, "allow_other") != 0) { + fprintf(stderr, "Warning: unexpected fuse option %s\n", v); + exit(EXIT_FAILURE); + } + free(v); + v = NULL; + } + if (swallow_option(&argc, argv, "-p", &v)) + pidfile = v; - return ret; + if (argc == 2 && strcmp(argv[1], "--version") == 0) { + fprintf(stderr, "%s\n", VERSION); + exit(EXIT_SUCCESS); + } + if (argc != 2 || is_help(argv[1])) + usage(); + + do_reload(); + if (signal(SIGUSR1, reload_handler) == SIG_ERR) { + fprintf(stderr, "Error setting USR1 signal handler: %m\n"); + goto out; + } + + newargv[cnt++] = argv[0]; + if (debug) { + newargv[cnt++] = "-d"; + } else { + newargv[cnt++] = "-f"; + } + newargv[cnt++] = "-o"; + newargv[cnt++] = "allow_other,direct_io,entry_timeout=0.5,attr_timeout=0.5"; + newargv[cnt++] = argv[1]; + newargv[cnt++] = NULL; + + if (!pidfile) { + pidfile_len = strlen(RUNTIME_PATH) + strlen("/lxcfs.pid") + 1; + pidfile = alloca(pidfile_len); + snprintf(pidfile, pidfile_len, "%s/lxcfs.pid", RUNTIME_PATH); + } + if ((pidfd = set_pidfile(pidfile)) < 0) + goto out; + + if (!fuse_main(nargs, newargv, &lxcfs_ops, NULL)) + ret = EXIT_SUCCESS; + +out: + if (dlopen_handle) + dlclose(dlopen_handle); + if (pidfile) + unlink(pidfile); + if (pidfd > 0) + close(pidfd); + exit(ret); }