3 * Copyright © 2014 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 * NOTES - make sure to run this as -s to avoid threading.
22 * TODO - can we enforce that here from the code?
24 #define FUSE_USE_VERSION 26
38 #include <nih/alloc.h>
39 #include <nih/string.h>
41 #include "cgmanager.h"
45 * a null-terminated, nih-allocated list of the mounted subsystems. We
46 * detect this at startup.
50 #define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data)
53 * Given a open file * to /proc/pid/{u,g}id_map, and an id
54 * valid in the caller's namespace, return the id mapped into
56 * Returns the mapped id, or -1 on error.
59 convert_id_to_ns(FILE *idfile
, unsigned int in_id
)
61 unsigned int nsuid
, // base id for a range in the idfile's namespace
62 hostuid
, // base id for a range in the caller's namespace
63 count
; // number of ids in this range
67 fseek(idfile
, 0L, SEEK_SET
);
68 while (fgets(line
, 400, idfile
)) {
69 ret
= sscanf(line
, "%u %u %u\n", &nsuid
, &hostuid
, &count
);
72 if (hostuid
+ count
< hostuid
|| nsuid
+ count
< nsuid
) {
74 * uids wrapped around - unexpected as this is a procfile,
77 fprintf(stderr
, "pid wrapparound at entry %u %u %u in %s",
78 nsuid
, hostuid
, count
, line
);
81 if (hostuid
<= in_id
&& hostuid
+count
> in_id
) {
83 * now since hostuid <= in_id < hostuid+count, and
84 * hostuid+count and nsuid+count do not wrap around,
85 * we know that nsuid+(in_id-hostuid) which must be
86 * less that nsuid+(count) must not wrap around
88 return (in_id
- hostuid
) + nsuid
;
97 * for is_privileged_over,
98 * specify whether we require the calling uid to be root in his
101 #define NS_ROOT_REQD true
102 #define NS_ROOT_OPT false
104 static bool is_privileged_over(pid_t pid
, uid_t uid
, uid_t victim
, bool req_ns_root
)
106 nih_local
char *fpath
= NULL
;
110 if (victim
== -1 || uid
== -1)
114 * If the request is one not requiring root in the namespace,
115 * then having the same uid suffices. (i.e. uid 1000 has write
116 * access to files owned by uid 1000
118 if (!req_ns_root
&& uid
== victim
)
121 fpath
= NIH_MUST( nih_sprintf(NULL
, "/proc/%d/uid_map", pid
) );
122 FILE *f
= fopen(fpath
, "r");
126 /* if caller's not root in his namespace, reject */
127 nsuid
= convert_id_to_ns(f
, uid
);
132 * If victim is not mapped into caller's ns, reject.
133 * XXX I'm not sure this check is needed given that fuse
134 * will be sending requests where the vfs has converted
136 nsuid
= convert_id_to_ns(f
, victim
);
147 static bool perms_include(int fmode
, mode_t req_mode
)
151 switch (req_mode
& O_ACCMODE
) {
159 r
= S_IROTH
| S_IWOTH
;
164 return ((fmode
& r
) == r
);
168 * check whether a fuse context may access a cgroup dir or file
170 * If file is not null, it is a cgroup file to check under cg.
171 * If file is null, then we are checking perms on cg itself.
173 * For files we can check the mode of the list_keys result.
174 * For cgroups, we must make assumptions based on the files under the
175 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
178 static bool fc_may_access(struct fuse_context
*fc
, const char *contrl
, const char *cg
, const char *file
, mode_t mode
)
180 nih_local
struct cgm_keys
**list
= NULL
;
189 if (!cgm_list_keys(contrl
, cg
, &list
))
191 for (i
= 0; list
[i
]; i
++) {
192 if (strcmp(list
[i
]->name
, file
) == 0) {
193 struct cgm_keys
*k
= list
[i
];
194 if (is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_OPT
)) {
195 if (perms_include(k
->mode
>> 6, mode
))
198 if (fc
->gid
== k
->gid
) {
199 if (perms_include(k
->mode
>> 3, mode
))
202 return perms_include(k
->mode
, mode
);
210 * given /cgroup/freezer/a/b, return "freezer". this will be nih-allocated
211 * and needs to be nih_freed.
213 static char *pick_controller_from_path(struct fuse_context
*fc
, const char *path
)
218 if (strlen(path
) < 9)
221 ret
= nih_strdup(NULL
, p1
);
224 slash
= strstr(ret
, "/");
228 /* verify that it is a subsystem */
229 char **list
= LXCFS_DATA
? LXCFS_DATA
->subsystems
: NULL
;
235 for (i
= 0; list
[i
]; i
++) {
236 if (strcmp(list
[i
], ret
) == 0)
244 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
245 * Note that the returned value may include files (keynames) etc
247 static const char *find_cgroup_in_path(const char *path
)
251 if (strlen(path
) < 9)
253 p1
= strstr(path
+8, "/");
259 static bool is_child_cgroup(const char *contr
, const char *dir
, const char *f
)
261 nih_local
char **list
= NULL
;
269 if (!cgm_list_children(contr
, dir
, &list
))
271 for (i
= 0; list
[i
]; i
++) {
272 if (strcmp(list
[i
], f
) == 0)
279 static struct cgm_keys
*get_cgroup_key(const char *contr
, const char *dir
, const char *f
)
281 nih_local
struct cgm_keys
**list
= NULL
;
289 if (!cgm_list_keys(contr
, dir
, &list
))
291 for (i
= 0; list
[i
]; i
++) {
292 if (strcmp(list
[i
]->name
, f
) == 0) {
293 k
= NIH_MUST( nih_alloc(NULL
, (sizeof(*k
))) );
294 k
->name
= NIH_MUST( nih_strdup(k
, list
[i
]->name
) );
295 k
->uid
= list
[i
]->uid
;
296 k
->gid
= list
[i
]->gid
;
297 k
->mode
= list
[i
]->mode
;
305 static void get_cgdir_and_path(const char *cg
, char **dir
, char **file
)
309 *dir
= NIH_MUST( nih_strdup(NULL
, cg
) );
310 *file
= strrchr(cg
, '/');
315 p
= strrchr(*dir
, '/');
319 static size_t get_file_size(const char *contrl
, const char *cg
, const char *f
)
321 nih_local
char *data
= NULL
;
323 if (!cgm_get_value(contrl
, cg
, f
, &data
))
330 * FUSE ops for /cgroup
333 static int cg_getattr(const char *path
, struct stat
*sb
)
336 struct fuse_context
*fc
= fuse_get_context();
337 nih_local
char * cgdir
= NULL
;
338 char *fpath
= NULL
, *path1
, *path2
;
339 nih_local
struct cgm_keys
*k
= NULL
;
341 nih_local
char *controller
= NULL
;
347 memset(sb
, 0, sizeof(struct stat
));
349 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
352 sb
->st_uid
= sb
->st_gid
= 0;
353 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
356 if (strcmp(path
, "/cgroup") == 0) {
357 sb
->st_mode
= S_IFDIR
| 00755;
362 controller
= pick_controller_from_path(fc
, path
);
365 cgroup
= find_cgroup_in_path(path
);
367 /* this is just /cgroup/controller, return it as a dir */
368 sb
->st_mode
= S_IFDIR
| 00755;
373 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
383 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
384 * Then check that caller's cgroup is under path if fpath is a child
385 * cgroup, or cgdir if fpath is a file */
387 if (is_child_cgroup(controller
, path1
, path2
)) {
388 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
))
391 // get uid, gid, from '/tasks' file and make up a mode
392 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
393 sb
->st_mode
= S_IFDIR
| 00755;
394 k
= get_cgroup_key(controller
, cgroup
, "tasks");
396 fprintf(stderr
, "Failed to find a tasks file for %s\n", cgroup
);
397 sb
->st_uid
= sb
->st_gid
= 0;
399 fprintf(stderr
, "found a tasks file for %s\n", cgroup
);
407 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
408 if (!fc_may_access(fc
, controller
, path1
, path2
, O_RDONLY
))
411 sb
->st_mode
= S_IFREG
| k
->mode
;
415 sb
->st_size
= get_file_size(controller
, path1
, path2
);
422 static int cg_opendir(const char *path
, struct fuse_file_info
*fi
)
427 static int cg_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
428 struct fuse_file_info
*fi
)
430 struct fuse_context
*fc
= fuse_get_context();
435 if (strcmp(path
, "/cgroup") == 0) {
436 // get list of controllers
437 char **list
= LXCFS_DATA
? LXCFS_DATA
->subsystems
: NULL
;
442 /* TODO - collect the list of controllers at fuse_init */
443 for (i
= 0; list
[i
]; i
++) {
444 if (filler(buf
, list
[i
], NULL
, 0) != 0) {
451 // return list of keys for the controller, and list of child cgroups
452 nih_local
struct cgm_keys
**list
= NULL
;
454 nih_local
char *controller
= NULL
;
457 controller
= pick_controller_from_path(fc
, path
);
461 cgroup
= find_cgroup_in_path(path
);
463 /* this is just /cgroup/controller, return its contents */
467 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
))
470 if (!cgm_list_keys(controller
, cgroup
, &list
))
472 for (i
= 0; list
[i
]; i
++) {
473 if (filler(buf
, list
[i
]->name
, NULL
, 0) != 0) {
478 // now get the list of child cgroups
479 nih_local
char **clist
;
481 if (!cgm_list_children(controller
, cgroup
, &clist
))
483 for (i
= 0; clist
[i
]; i
++) {
484 if (filler(buf
, clist
[i
], NULL
, 0) != 0) {
491 static int cg_releasedir(const char *path
, struct fuse_file_info
*fi
)
496 static int cg_open(const char *path
, struct fuse_file_info
*fi
)
498 nih_local
char *controller
= NULL
;
500 char *fpath
= NULL
, *path1
, *path2
;
501 nih_local
char * cgdir
= NULL
;
502 nih_local
struct cgm_keys
*k
= NULL
;
503 struct fuse_context
*fc
= fuse_get_context();
508 controller
= pick_controller_from_path(fc
, path
);
511 cgroup
= find_cgroup_in_path(path
);
515 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
524 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
525 if (!fc_may_access(fc
, controller
, path1
, path2
, fi
->flags
))
528 /* TODO - we want to cache this info for read/write */
535 static int cg_read(const char *path
, char *buf
, size_t size
, off_t offset
,
536 struct fuse_file_info
*fi
)
538 nih_local
char *controller
= NULL
;
540 char *fpath
= NULL
, *path1
, *path2
;
541 struct fuse_context
*fc
= fuse_get_context();
542 nih_local
char * cgdir
= NULL
;
543 nih_local
struct cgm_keys
*k
= NULL
;
551 controller
= pick_controller_from_path(fc
, path
);
554 cgroup
= find_cgroup_in_path(path
);
558 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
567 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
568 nih_local
char *data
= NULL
;
571 if (!fc_may_access(fc
, controller
, path1
, path2
, O_RDONLY
))
574 if (!cgm_get_value(controller
, path1
, path2
, &data
))
580 memcpy(buf
, data
, s
);
588 int cg_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
589 struct fuse_file_info
*fi
)
591 nih_local
char *controller
= NULL
;
593 char *fpath
= NULL
, *path1
, *path2
;
594 struct fuse_context
*fc
= fuse_get_context();
595 nih_local
char * cgdir
= NULL
;
596 nih_local
struct cgm_keys
*k
= NULL
;
598 fprintf(stderr
, "cg_write: starting\n");
606 controller
= pick_controller_from_path(fc
, path
);
609 cgroup
= find_cgroup_in_path(path
);
613 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
622 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
623 if (!fc_may_access(fc
, controller
, path1
, path2
, O_WRONLY
))
626 if (!cgm_set_value(controller
, path1
, path2
, buf
))
635 int cg_chown(const char *path
, uid_t uid
, gid_t gid
)
637 struct fuse_context
*fc
= fuse_get_context();
638 nih_local
char * cgdir
= NULL
;
639 char *fpath
= NULL
, *path1
, *path2
;
640 nih_local
struct cgm_keys
*k
= NULL
;
642 nih_local
char *controller
= NULL
;
648 if (strcmp(path
, "/cgroup") == 0)
651 controller
= pick_controller_from_path(fc
, path
);
654 cgroup
= find_cgroup_in_path(path
);
656 /* this is just /cgroup/controller */
659 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
669 if (is_child_cgroup(controller
, path1
, path2
)) {
670 // get uid, gid, from '/tasks' file and make up a mode
671 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
672 k
= get_cgroup_key(controller
, cgroup
, "tasks");
675 k
= get_cgroup_key(controller
, path1
, path2
);
681 * This being a fuse request, the uid and gid must be valid
682 * in the caller's namespace. So we can just check to make
683 * sure that the caller is root in his uid, and privileged
684 * over the file's current owner.
686 if (!is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_REQD
))
689 if (!cgm_chown_file(controller
, cgroup
, uid
, gid
))
694 int cg_mkdir(const char *path
, mode_t mode
)
696 struct fuse_context
*fc
= fuse_get_context();
697 nih_local
struct cgm_keys
**list
= NULL
;
698 char *fpath
= NULL
, *path1
;
699 nih_local
char * cgdir
= NULL
;
701 nih_local
char *controller
= NULL
;
707 controller
= pick_controller_from_path(fc
, path
);
711 cgroup
= find_cgroup_in_path(path
);
715 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
721 if (!fc_may_access(fc
, controller
, path1
, NULL
, O_RDWR
))
725 if (!cgm_create(controller
, cgroup
, fc
->uid
, fc
->gid
))
731 static int cg_rmdir(const char *path
)
733 struct fuse_context
*fc
= fuse_get_context();
734 nih_local
struct cgm_keys
**list
= NULL
;
736 nih_local
char * cgdir
= NULL
;
738 nih_local
char *controller
= NULL
;
744 controller
= pick_controller_from_path(fc
, path
);
748 cgroup
= find_cgroup_in_path(path
);
752 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
756 if (!fc_may_access(fc
, controller
, cgdir
, NULL
, O_WRONLY
))
759 if (!cgm_remove(controller
, cgroup
))
769 static int proc_getattr(const char *path
, struct stat
*sb
)
771 if (strcmp(path
, "/proc") != 0)
773 sb
->st_mode
= S_IFDIR
| 00755;
780 * these just delegate to the /proc and /cgroup ops as
784 static int lxcfs_getattr(const char *path
, struct stat
*sb
)
786 if (strcmp(path
, "/") == 0) {
787 sb
->st_mode
= S_IFDIR
| 00755;
791 if (strncmp(path
, "/cgroup", 7) == 0) {
792 return cg_getattr(path
, sb
);
794 if (strncmp(path
, "/proc", 7) == 0) {
795 return proc_getattr(path
, sb
);
800 static int lxcfs_opendir(const char *path
, struct fuse_file_info
*fi
)
802 if (strcmp(path
, "/") == 0)
805 if (strncmp(path
, "/cgroup", 7) == 0) {
806 return cg_opendir(path
, fi
);
811 static int lxcfs_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
812 struct fuse_file_info
*fi
)
814 if (strcmp(path
, "/") == 0) {
815 if (filler(buf
, "proc", NULL
, 0) != 0 ||
816 filler(buf
, "cgroup", NULL
, 0) != 0)
820 if (strncmp(path
, "/cgroup", 7) == 0) {
821 return cg_readdir(path
, buf
, filler
, offset
, fi
);
826 static int lxcfs_releasedir(const char *path
, struct fuse_file_info
*fi
)
828 if (strcmp(path
, "/") == 0)
830 if (strncmp(path
, "/cgroup", 7) == 0) {
831 return cg_releasedir(path
, fi
);
836 static int lxcfs_open(const char *path
, struct fuse_file_info
*fi
)
838 if (strncmp(path
, "/cgroup", 7) == 0) {
839 return cg_open(path
, fi
);
845 static int lxcfs_read(const char *path
, char *buf
, size_t size
, off_t offset
,
846 struct fuse_file_info
*fi
)
848 if (strncmp(path
, "/cgroup", 7) == 0) {
849 return cg_read(path
, buf
, size
, offset
, fi
);
855 int lxcfs_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
856 struct fuse_file_info
*fi
)
858 if (strncmp(path
, "/cgroup", 7) == 0) {
859 return cg_write(path
, buf
, size
, offset
, fi
);
865 static int lxcfs_flush(const char *path
, struct fuse_file_info
*fi
)
870 static int lxcfs_release(const char *path
, struct fuse_file_info
*fi
)
875 static int lxcfs_fsync(const char *path
, int datasync
, struct fuse_file_info
*fi
)
880 int lxcfs_mkdir(const char *path
, mode_t mode
)
882 if (strncmp(path
, "/cgroup", 7) == 0)
883 return cg_mkdir(path
, mode
);
888 int lxcfs_chown(const char *path
, uid_t uid
, gid_t gid
)
890 if (strncmp(path
, "/cgroup", 7) == 0)
891 return cg_chown(path
, uid
, gid
);
897 * cat first does a truncate before doing ops->write. This doesn't
898 * really make sense for cgroups. So just return 0 always but do
901 int lxcfs_truncate(const char *path
, off_t newsize
)
903 if (strncmp(path
, "/cgroup", 7) == 0)
908 int lxcfs_rmdir(const char *path
)
910 if (strncmp(path
, "/cgroup", 7) == 0)
911 return cg_rmdir(path
);
915 const struct fuse_operations lxcfs_ops
= {
916 .getattr
= lxcfs_getattr
,
920 .mkdir
= lxcfs_mkdir
,
922 .rmdir
= lxcfs_rmdir
,
927 .chown
= lxcfs_chown
,
928 .truncate
= lxcfs_truncate
,
933 .release
= lxcfs_release
,
934 .write
= lxcfs_write
,
937 .flush
= lxcfs_flush
,
938 .fsync
= lxcfs_fsync
,
945 .opendir
= lxcfs_opendir
,
946 .readdir
= lxcfs_readdir
,
947 .releasedir
= lxcfs_releasedir
,
958 static void usage(const char *me
)
960 fprintf(stderr
, "Usage:\n");
961 fprintf(stderr
, "\n");
962 fprintf(stderr
, "%s [FUSE and mount options] mountpoint\n", me
);
966 static bool is_help(char *w
)
968 if (strcmp(w
, "-h") == 0 ||
969 strcmp(w
, "--help") == 0 ||
970 strcmp(w
, "-help") == 0 ||
971 strcmp(w
, "help") == 0)
976 int main(int argc
, char *argv
[])
979 struct lxcfs_state
*d
;
981 if (argc
< 2 || is_help(argv
[1]))
984 d
= malloc(sizeof(*d
));
988 if (!cgm_escape_cgroup())
989 fprintf(stderr
, "WARNING: failed to escape to root cgroup\n");
991 if (!cgm_get_controllers(&d
->subsystems
))
994 ret
= fuse_main(argc
, argv
, &lxcfs_ops
, d
);