3 * Copyright © 2014 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
6 * See COPYING file for details.
10 * NOTES - make sure to run this as -s to avoid threading.
11 * TODO - can we enforce that here from the code?
13 #define FUSE_USE_VERSION 26
27 #include <nih/alloc.h>
28 #include <nih/string.h>
30 #include "cgmanager.h"
34 * a null-terminated, nih-allocated list of the mounted subsystems. We
35 * detect this at startup.
39 #define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data)
42 * Given a open file * to /proc/pid/{u,g}id_map, and an id
43 * valid in the caller's namespace, return the id mapped into
45 * Returns the mapped id, or -1 on error.
48 convert_id_to_ns(FILE *idfile
, unsigned int in_id
)
50 unsigned int nsuid
, // base id for a range in the idfile's namespace
51 hostuid
, // base id for a range in the caller's namespace
52 count
; // number of ids in this range
56 fseek(idfile
, 0L, SEEK_SET
);
57 while (fgets(line
, 400, idfile
)) {
58 ret
= sscanf(line
, "%u %u %u\n", &nsuid
, &hostuid
, &count
);
61 if (hostuid
+ count
< hostuid
|| nsuid
+ count
< nsuid
) {
63 * uids wrapped around - unexpected as this is a procfile,
66 fprintf(stderr
, "pid wrapparound at entry %u %u %u in %s",
67 nsuid
, hostuid
, count
, line
);
70 if (hostuid
<= in_id
&& hostuid
+count
> in_id
) {
72 * now since hostuid <= in_id < hostuid+count, and
73 * hostuid+count and nsuid+count do not wrap around,
74 * we know that nsuid+(in_id-hostuid) which must be
75 * less that nsuid+(count) must not wrap around
77 return (in_id
- hostuid
) + nsuid
;
86 * for is_privileged_over,
87 * specify whether we require the calling uid to be root in his
90 #define NS_ROOT_REQD true
91 #define NS_ROOT_OPT false
93 static bool is_privileged_over(pid_t pid
, uid_t uid
, uid_t victim
, bool req_ns_root
)
95 nih_local
char *fpath
= NULL
;
99 if (victim
== -1 || uid
== -1)
103 * If the request is one not requiring root in the namespace,
104 * then having the same uid suffices. (i.e. uid 1000 has write
105 * access to files owned by uid 1000
107 if (!req_ns_root
&& uid
== victim
)
110 fpath
= NIH_MUST( nih_sprintf(NULL
, "/proc/%d/uid_map", pid
) );
111 FILE *f
= fopen(fpath
, "r");
115 /* if caller's not root in his namespace, reject */
116 nsuid
= convert_id_to_ns(f
, uid
);
121 * If victim is not mapped into caller's ns, reject.
122 * XXX I'm not sure this check is needed given that fuse
123 * will be sending requests where the vfs has converted
125 nsuid
= convert_id_to_ns(f
, victim
);
136 static bool perms_include(int fmode
, mode_t req_mode
)
140 switch (req_mode
& O_ACCMODE
) {
148 r
= S_IROTH
| S_IWOTH
;
153 return ((fmode
& r
) == r
);
156 static char *get_next_cgroup_dir(const char *taskcg
, const char *querycg
)
160 if (strlen(taskcg
) <= strlen(querycg
)) {
161 fprintf(stderr
, "%s: I was fed bad input\n", __func__
);
165 if (strcmp(querycg
, "/") == 0)
166 start
= NIH_MUST( nih_strdup(NULL
, taskcg
+ 1) );
168 start
= NIH_MUST( nih_strdup(NULL
, taskcg
+ strlen(querycg
) + 1) );
169 end
= strchr(start
, '/');
176 * check whether a fuse context may access a cgroup dir or file
178 * If file is not null, it is a cgroup file to check under cg.
179 * If file is null, then we are checking perms on cg itself.
181 * For files we can check the mode of the list_keys result.
182 * For cgroups, we must make assumptions based on the files under the
183 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
186 static bool fc_may_access(struct fuse_context
*fc
, const char *contrl
, const char *cg
, const char *file
, mode_t mode
)
188 nih_local
struct cgm_keys
**list
= NULL
;
197 if (!cgm_list_keys(contrl
, cg
, &list
))
199 for (i
= 0; list
[i
]; i
++) {
200 if (strcmp(list
[i
]->name
, file
) == 0) {
201 struct cgm_keys
*k
= list
[i
];
202 if (is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_OPT
)) {
203 if (perms_include(k
->mode
>> 6, mode
))
206 if (fc
->gid
== k
->gid
) {
207 if (perms_include(k
->mode
>> 3, mode
))
210 return perms_include(k
->mode
, mode
);
217 static void stripnewline(char *x
)
219 size_t l
= strlen(x
);
220 if (l
&& x
[l
-1] == '\n')
225 * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
226 * If caller is in /a, he may act on /a/b, but not on /b.
227 * if the answer is false and nextcg is not NULL, then *nextcg will point
228 * to a nih_alloc'd string containing the next cgroup directory under cg
230 static bool caller_is_in_ancestor(pid_t pid
, const char *contrl
, const char *cg
, char **nextcg
)
232 nih_local
char *fnam
= NULL
;
238 fnam
= NIH_MUST( nih_sprintf(NULL
, "/proc/%d/cgroup", pid
) );
239 if (!(f
= fopen(fnam
, "r")))
242 while (getline(&line
, &len
, f
) != -1) {
243 char *c1
, *c2
, *linecmp
;
246 c1
= strchr(line
, ':');
250 c2
= strchr(c1
, ':');
254 if (strcmp(c1
, contrl
) != 0)
259 * callers pass in '/' for root cgroup, otherwise they pass
260 * in a cgroup without leading '/'
262 linecmp
= *cg
== '/' ? c2
: c2
+1;
263 if (strncmp(linecmp
, cg
, strlen(linecmp
)) != 0) {
265 *nextcg
= get_next_cgroup_dir(linecmp
, cg
);
279 * given /cgroup/freezer/a/b, return "freezer". this will be nih-allocated
280 * and needs to be nih_freed.
282 static char *pick_controller_from_path(struct fuse_context
*fc
, const char *path
)
287 if (strlen(path
) < 9)
290 ret
= nih_strdup(NULL
, p1
);
293 slash
= strstr(ret
, "/");
297 /* verify that it is a subsystem */
298 char **list
= LXCFS_DATA
? LXCFS_DATA
->subsystems
: NULL
;
304 for (i
= 0; list
[i
]; i
++) {
305 if (strcmp(list
[i
], ret
) == 0)
313 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
314 * Note that the returned value may include files (keynames) etc
316 static const char *find_cgroup_in_path(const char *path
)
320 if (strlen(path
) < 9)
322 p1
= strstr(path
+8, "/");
328 static bool is_child_cgroup(const char *contr
, const char *dir
, const char *f
)
330 nih_local
char **list
= NULL
;
338 if (!cgm_list_children(contr
, dir
, &list
))
340 for (i
= 0; list
[i
]; i
++) {
341 if (strcmp(list
[i
], f
) == 0)
348 static struct cgm_keys
*get_cgroup_key(const char *contr
, const char *dir
, const char *f
)
350 nih_local
struct cgm_keys
**list
= NULL
;
358 if (!cgm_list_keys(contr
, dir
, &list
))
360 for (i
= 0; list
[i
]; i
++) {
361 if (strcmp(list
[i
]->name
, f
) == 0) {
362 k
= NIH_MUST( nih_alloc(NULL
, (sizeof(*k
))) );
363 k
->name
= NIH_MUST( nih_strdup(k
, list
[i
]->name
) );
364 k
->uid
= list
[i
]->uid
;
365 k
->gid
= list
[i
]->gid
;
366 k
->mode
= list
[i
]->mode
;
374 static void get_cgdir_and_path(const char *cg
, char **dir
, char **file
)
378 *dir
= NIH_MUST( nih_strdup(NULL
, cg
) );
379 *file
= strrchr(cg
, '/');
384 p
= strrchr(*dir
, '/');
388 static size_t get_file_size(const char *contrl
, const char *cg
, const char *f
)
390 nih_local
char *data
= NULL
;
392 if (!cgm_get_value(contrl
, cg
, f
, &data
))
399 * FUSE ops for /cgroup
402 static int cg_getattr(const char *path
, struct stat
*sb
)
405 struct fuse_context
*fc
= fuse_get_context();
406 nih_local
char * cgdir
= NULL
;
407 char *fpath
= NULL
, *path1
, *path2
;
408 nih_local
struct cgm_keys
*k
= NULL
;
410 nih_local
char *controller
= NULL
;
416 memset(sb
, 0, sizeof(struct stat
));
418 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
421 sb
->st_uid
= sb
->st_gid
= 0;
422 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
425 if (strcmp(path
, "/cgroup") == 0) {
426 sb
->st_mode
= S_IFDIR
| 00755;
431 controller
= pick_controller_from_path(fc
, path
);
434 cgroup
= find_cgroup_in_path(path
);
437 /* this is just /cgroup/controller, return it as a dir */
438 sb
->st_mode
= S_IFDIR
| 00755;
443 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
453 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
454 * Then check that caller's cgroup is under path if fpath is a child
455 * cgroup, or cgdir if fpath is a file */
457 if (is_child_cgroup(controller
, path1
, path2
)) {
458 if (!caller_is_in_ancestor(fc
->pid
, controller
, cgroup
, NULL
))
460 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
))
463 // get uid, gid, from '/tasks' file and make up a mode
464 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
465 sb
->st_mode
= S_IFDIR
| 00755;
466 k
= get_cgroup_key(controller
, cgroup
, "tasks");
468 sb
->st_uid
= sb
->st_gid
= 0;
477 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
478 if (!caller_is_in_ancestor(fc
->pid
, controller
, path1
, NULL
))
480 if (!fc_may_access(fc
, controller
, path1
, path2
, O_RDONLY
))
483 sb
->st_mode
= S_IFREG
| k
->mode
;
487 sb
->st_size
= get_file_size(controller
, path1
, path2
);
494 static int cg_opendir(const char *path
, struct fuse_file_info
*fi
)
499 static int cg_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
500 struct fuse_file_info
*fi
)
502 struct fuse_context
*fc
= fuse_get_context();
507 if (strcmp(path
, "/cgroup") == 0) {
508 // get list of controllers
509 char **list
= LXCFS_DATA
? LXCFS_DATA
->subsystems
: NULL
;
514 /* TODO - collect the list of controllers at fuse_init */
515 for (i
= 0; list
[i
]; i
++) {
516 if (filler(buf
, list
[i
], NULL
, 0) != 0) {
523 // return list of keys for the controller, and list of child cgroups
524 nih_local
struct cgm_keys
**list
= NULL
;
526 nih_local
char *controller
= NULL
;
528 nih_local
char *nextcg
= NULL
;
530 controller
= pick_controller_from_path(fc
, path
);
534 cgroup
= find_cgroup_in_path(path
);
536 /* this is just /cgroup/controller, return its contents */
540 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
))
543 if (!cgm_list_keys(controller
, cgroup
, &list
))
544 // not a valid cgroup
547 if (!caller_is_in_ancestor(fc
->pid
, controller
, cgroup
, &nextcg
)) {
550 ret
= filler(buf
, nextcg
, NULL
, 0);
557 for (i
= 0; list
[i
]; i
++) {
558 if (filler(buf
, list
[i
]->name
, NULL
, 0) != 0) {
563 // now get the list of child cgroups
564 nih_local
char **clist
;
566 if (!cgm_list_children(controller
, cgroup
, &clist
))
568 for (i
= 0; clist
[i
]; i
++) {
569 if (filler(buf
, clist
[i
], NULL
, 0) != 0) {
576 static int cg_releasedir(const char *path
, struct fuse_file_info
*fi
)
581 static int cg_open(const char *path
, struct fuse_file_info
*fi
)
583 nih_local
char *controller
= NULL
;
585 char *fpath
= NULL
, *path1
, *path2
;
586 nih_local
char * cgdir
= NULL
;
587 nih_local
struct cgm_keys
*k
= NULL
;
588 struct fuse_context
*fc
= fuse_get_context();
593 controller
= pick_controller_from_path(fc
, path
);
596 cgroup
= find_cgroup_in_path(path
);
600 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
609 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
610 if (!fc_may_access(fc
, controller
, path1
, path2
, fi
->flags
))
613 /* TODO - we want to cache this info for read/write */
620 static int cg_read(const char *path
, char *buf
, size_t size
, off_t offset
,
621 struct fuse_file_info
*fi
)
623 nih_local
char *controller
= NULL
;
625 char *fpath
= NULL
, *path1
, *path2
;
626 struct fuse_context
*fc
= fuse_get_context();
627 nih_local
char * cgdir
= NULL
;
628 nih_local
struct cgm_keys
*k
= NULL
;
636 controller
= pick_controller_from_path(fc
, path
);
639 cgroup
= find_cgroup_in_path(path
);
643 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
652 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
653 nih_local
char *data
= NULL
;
656 if (!fc_may_access(fc
, controller
, path1
, path2
, O_RDONLY
))
659 if (!cgm_get_value(controller
, path1
, path2
, &data
))
665 memcpy(buf
, data
, s
);
673 int cg_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
674 struct fuse_file_info
*fi
)
676 nih_local
char *controller
= NULL
;
678 char *fpath
= NULL
, *path1
, *path2
;
679 struct fuse_context
*fc
= fuse_get_context();
680 nih_local
char * cgdir
= NULL
;
681 nih_local
struct cgm_keys
*k
= NULL
;
689 controller
= pick_controller_from_path(fc
, path
);
692 cgroup
= find_cgroup_in_path(path
);
696 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
705 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
706 if (!fc_may_access(fc
, controller
, path1
, path2
, O_WRONLY
))
709 if (!cgm_set_value(controller
, path1
, path2
, buf
))
718 int cg_chown(const char *path
, uid_t uid
, gid_t gid
)
720 struct fuse_context
*fc
= fuse_get_context();
721 nih_local
char * cgdir
= NULL
;
722 char *fpath
= NULL
, *path1
, *path2
;
723 nih_local
struct cgm_keys
*k
= NULL
;
725 nih_local
char *controller
= NULL
;
731 if (strcmp(path
, "/cgroup") == 0)
734 controller
= pick_controller_from_path(fc
, path
);
737 cgroup
= find_cgroup_in_path(path
);
739 /* this is just /cgroup/controller */
742 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
752 if (is_child_cgroup(controller
, path1
, path2
)) {
753 // get uid, gid, from '/tasks' file and make up a mode
754 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
755 k
= get_cgroup_key(controller
, cgroup
, "tasks");
758 k
= get_cgroup_key(controller
, path1
, path2
);
764 * This being a fuse request, the uid and gid must be valid
765 * in the caller's namespace. So we can just check to make
766 * sure that the caller is root in his uid, and privileged
767 * over the file's current owner.
769 if (!is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_REQD
))
772 if (!cgm_chown_file(controller
, cgroup
, uid
, gid
))
777 int cg_chmod(const char *path
, mode_t mode
)
779 struct fuse_context
*fc
= fuse_get_context();
780 nih_local
char * cgdir
= NULL
;
781 char *fpath
= NULL
, *path1
, *path2
;
782 nih_local
struct cgm_keys
*k
= NULL
;
784 nih_local
char *controller
= NULL
;
789 if (strcmp(path
, "/cgroup") == 0)
792 controller
= pick_controller_from_path(fc
, path
);
795 cgroup
= find_cgroup_in_path(path
);
797 /* this is just /cgroup/controller */
800 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
810 if (is_child_cgroup(controller
, path1
, path2
)) {
811 // get uid, gid, from '/tasks' file and make up a mode
812 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
813 k
= get_cgroup_key(controller
, cgroup
, "tasks");
816 k
= get_cgroup_key(controller
, path1
, path2
);
822 * This being a fuse request, the uid and gid must be valid
823 * in the caller's namespace. So we can just check to make
824 * sure that the caller is root in his uid, and privileged
825 * over the file's current owner.
827 if (!is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_OPT
))
830 if (!cgm_chmod_file(controller
, cgroup
, mode
))
835 int cg_mkdir(const char *path
, mode_t mode
)
837 struct fuse_context
*fc
= fuse_get_context();
838 nih_local
struct cgm_keys
**list
= NULL
;
839 char *fpath
= NULL
, *path1
;
840 nih_local
char * cgdir
= NULL
;
842 nih_local
char *controller
= NULL
;
848 controller
= pick_controller_from_path(fc
, path
);
852 cgroup
= find_cgroup_in_path(path
);
856 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
862 if (!fc_may_access(fc
, controller
, path1
, NULL
, O_RDWR
))
866 if (!cgm_create(controller
, cgroup
, fc
->uid
, fc
->gid
))
872 static int cg_rmdir(const char *path
)
874 struct fuse_context
*fc
= fuse_get_context();
875 nih_local
struct cgm_keys
**list
= NULL
;
877 nih_local
char * cgdir
= NULL
;
879 nih_local
char *controller
= NULL
;
885 controller
= pick_controller_from_path(fc
, path
);
889 cgroup
= find_cgroup_in_path(path
);
893 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
897 if (!fc_may_access(fc
, controller
, cgdir
, NULL
, O_WRONLY
))
900 if (!cgm_remove(controller
, cgroup
))
910 static int proc_getattr(const char *path
, struct stat
*sb
)
912 if (strcmp(path
, "/proc") != 0)
914 sb
->st_mode
= S_IFDIR
| 00755;
921 * these just delegate to the /proc and /cgroup ops as
925 static int lxcfs_getattr(const char *path
, struct stat
*sb
)
927 if (strcmp(path
, "/") == 0) {
928 sb
->st_mode
= S_IFDIR
| 00755;
932 if (strncmp(path
, "/cgroup", 7) == 0) {
933 return cg_getattr(path
, sb
);
935 if (strncmp(path
, "/proc", 7) == 0) {
936 return proc_getattr(path
, sb
);
941 static int lxcfs_opendir(const char *path
, struct fuse_file_info
*fi
)
943 if (strcmp(path
, "/") == 0)
946 if (strncmp(path
, "/cgroup", 7) == 0) {
947 return cg_opendir(path
, fi
);
952 static int lxcfs_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
953 struct fuse_file_info
*fi
)
955 if (strcmp(path
, "/") == 0) {
956 if (filler(buf
, "proc", NULL
, 0) != 0 ||
957 filler(buf
, "cgroup", NULL
, 0) != 0)
961 if (strncmp(path
, "/cgroup", 7) == 0) {
962 return cg_readdir(path
, buf
, filler
, offset
, fi
);
967 static int lxcfs_releasedir(const char *path
, struct fuse_file_info
*fi
)
969 if (strcmp(path
, "/") == 0)
971 if (strncmp(path
, "/cgroup", 7) == 0) {
972 return cg_releasedir(path
, fi
);
977 static int lxcfs_open(const char *path
, struct fuse_file_info
*fi
)
979 if (strncmp(path
, "/cgroup", 7) == 0) {
980 return cg_open(path
, fi
);
986 static int lxcfs_read(const char *path
, char *buf
, size_t size
, off_t offset
,
987 struct fuse_file_info
*fi
)
989 if (strncmp(path
, "/cgroup", 7) == 0) {
990 return cg_read(path
, buf
, size
, offset
, fi
);
996 int lxcfs_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
997 struct fuse_file_info
*fi
)
999 if (strncmp(path
, "/cgroup", 7) == 0) {
1000 return cg_write(path
, buf
, size
, offset
, fi
);
1006 static int lxcfs_flush(const char *path
, struct fuse_file_info
*fi
)
1011 static int lxcfs_release(const char *path
, struct fuse_file_info
*fi
)
1016 static int lxcfs_fsync(const char *path
, int datasync
, struct fuse_file_info
*fi
)
1021 int lxcfs_mkdir(const char *path
, mode_t mode
)
1023 if (strncmp(path
, "/cgroup", 7) == 0)
1024 return cg_mkdir(path
, mode
);
1029 int lxcfs_chown(const char *path
, uid_t uid
, gid_t gid
)
1031 if (strncmp(path
, "/cgroup", 7) == 0)
1032 return cg_chown(path
, uid
, gid
);
1038 * cat first does a truncate before doing ops->write. This doesn't
1039 * really make sense for cgroups. So just return 0 always but do
1042 int lxcfs_truncate(const char *path
, off_t newsize
)
1044 if (strncmp(path
, "/cgroup", 7) == 0)
1049 int lxcfs_rmdir(const char *path
)
1051 if (strncmp(path
, "/cgroup", 7) == 0)
1052 return cg_rmdir(path
);
1056 int lxcfs_chmod(const char *path
, mode_t mode
)
1058 if (strncmp(path
, "/cgroup", 7) == 0)
1059 return cg_chmod(path
, mode
);
1063 const struct fuse_operations lxcfs_ops
= {
1064 .getattr
= lxcfs_getattr
,
1068 .mkdir
= lxcfs_mkdir
,
1070 .rmdir
= lxcfs_rmdir
,
1074 .chmod
= lxcfs_chmod
,
1075 .chown
= lxcfs_chown
,
1076 .truncate
= lxcfs_truncate
,
1081 .release
= lxcfs_release
,
1082 .write
= lxcfs_write
,
1085 .flush
= lxcfs_flush
,
1086 .fsync
= lxcfs_fsync
,
1091 .removexattr
= NULL
,
1093 .opendir
= lxcfs_opendir
,
1094 .readdir
= lxcfs_readdir
,
1095 .releasedir
= lxcfs_releasedir
,
1106 static void usage(const char *me
)
1108 fprintf(stderr
, "Usage:\n");
1109 fprintf(stderr
, "\n");
1110 fprintf(stderr
, "%s [FUSE and mount options] mountpoint\n", me
);
1114 static bool is_help(char *w
)
1116 if (strcmp(w
, "-h") == 0 ||
1117 strcmp(w
, "--help") == 0 ||
1118 strcmp(w
, "-help") == 0 ||
1119 strcmp(w
, "help") == 0)
1124 int main(int argc
, char *argv
[])
1127 struct lxcfs_state
*d
;
1129 if (argc
< 2 || is_help(argv
[1]))
1132 d
= malloc(sizeof(*d
));
1136 if (!cgm_escape_cgroup())
1137 fprintf(stderr
, "WARNING: failed to escape to root cgroup\n");
1139 if (!cgm_get_controllers(&d
->subsystems
))
1142 ret
= fuse_main(argc
, argv
, &lxcfs_ops
, d
);