3 * Copyright © 2014 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 * NOTES - make sure to run this as -s to avoid threading.
22 * TODO - can we enforce that here from the code?
24 #define FUSE_USE_VERSION 26
38 #include <nih/alloc.h>
39 #include <nih/string.h>
41 #include "cgmanager.h"
45 * a null-terminated, nih-allocated list of the mounted subsystems. We
46 * detect this at startup.
50 #define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data)
53 * Given a open file * to /proc/pid/{u,g}id_map, and an id
54 * valid in the caller's namespace, return the id mapped into
56 * Returns the mapped id, or -1 on error.
59 convert_id_to_ns(FILE *idfile
, unsigned int in_id
)
61 unsigned int nsuid
, // base id for a range in the idfile's namespace
62 hostuid
, // base id for a range in the caller's namespace
63 count
; // number of ids in this range
67 fseek(idfile
, 0L, SEEK_SET
);
68 while (fgets(line
, 400, idfile
)) {
69 ret
= sscanf(line
, "%u %u %u\n", &nsuid
, &hostuid
, &count
);
72 if (hostuid
+ count
< hostuid
|| nsuid
+ count
< nsuid
) {
74 * uids wrapped around - unexpected as this is a procfile,
77 fprintf(stderr
, "pid wrapparound at entry %u %u %u in %s",
78 nsuid
, hostuid
, count
, line
);
81 if (hostuid
<= in_id
&& hostuid
+count
> in_id
) {
83 * now since hostuid <= in_id < hostuid+count, and
84 * hostuid+count and nsuid+count do not wrap around,
85 * we know that nsuid+(in_id-hostuid) which must be
86 * less that nsuid+(count) must not wrap around
88 return (in_id
- hostuid
) + nsuid
;
96 static bool is_privileged_over(pid_t pid
, uid_t uid
, uid_t victim
)
98 nih_local
char *fpath
= NULL
;
105 /* check /proc/pid/uid_map */
106 fpath
= NIH_MUST( nih_sprintf(NULL
, "/proc/%d/uid_map", pid
) );
107 FILE *f
= fopen(fpath
, "r");
111 nsuid
= convert_id_to_ns(f
, uid
);
115 nsuid
= convert_id_to_ns(f
, victim
);
126 static bool perms_include(int fmode
, mode_t req_mode
)
130 switch (req_mode
& O_ACCMODE
) {
138 r
= S_IROTH
| S_IWOTH
;
143 return ((fmode
& r
) == r
);
147 * check whether a fuse context may access a cgroup dir or file
149 * If file is not null, it is a cgroup file to check under cg.
150 * If file is null, then we are checking perms on cg itself.
152 * For files we can check the mode of the list_keys result.
153 * For cgroups, we must make assumptions based on the files under the
154 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
157 static bool fc_may_access(struct fuse_context
*fc
, const char *contrl
, const char *cg
, const char *file
, mode_t mode
)
159 nih_local
struct cgm_keys
**list
= NULL
;
168 if (!cgm_list_keys(contrl
, cg
, &list
))
170 for (i
= 0; list
[i
]; i
++) {
171 if (strcmp(list
[i
]->name
, file
) == 0) {
172 struct cgm_keys
*k
= list
[i
];
173 if (is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
)) {
174 if (perms_include(k
->mode
>> 6, mode
))
177 if (fc
->gid
== k
->gid
) {
178 if (perms_include(k
->mode
>> 3, mode
))
181 return perms_include(k
->mode
, mode
);
189 * given /cgroup/freezer/a/b, return "freezer". this will be nih-allocated
190 * and needs to be nih_freed.
192 static char *pick_controller_from_path(struct fuse_context
*fc
, const char *path
)
197 if (strlen(path
) < 9)
200 ret
= nih_strdup(NULL
, p1
);
203 slash
= strstr(ret
, "/");
207 /* verify that it is a subsystem */
208 char **list
= LXCFS_DATA
? LXCFS_DATA
->subsystems
: NULL
;
214 for (i
= 0; list
[i
]; i
++) {
215 if (strcmp(list
[i
], ret
) == 0)
223 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
224 * Note that the returned value may include files (keynames) etc
226 static const char *find_cgroup_in_path(const char *path
)
230 if (strlen(path
) < 9)
232 p1
= strstr(path
+8, "/");
238 static bool is_child_cgroup(const char *contr
, const char *dir
, const char *f
)
240 nih_local
char **list
= NULL
;
248 if (!cgm_list_children(contr
, dir
, &list
))
250 for (i
= 0; list
[i
]; i
++) {
251 if (strcmp(list
[i
], f
) == 0)
258 static struct cgm_keys
*get_cgroup_key(const char *contr
, const char *dir
, const char *f
)
260 nih_local
struct cgm_keys
**list
= NULL
;
268 if (!cgm_list_keys(contr
, dir
, &list
))
270 for (i
= 0; list
[i
]; i
++) {
271 if (strcmp(list
[i
]->name
, f
) == 0) {
272 k
= NIH_MUST( nih_alloc(NULL
, (sizeof(*k
))) );
273 k
->name
= NIH_MUST( nih_strdup(k
, list
[i
]->name
) );
274 k
->uid
= list
[i
]->uid
;
275 k
->gid
= list
[i
]->gid
;
276 k
->mode
= list
[i
]->mode
;
284 static void get_cgdir_and_path(const char *cg
, char **dir
, char **file
)
288 *dir
= NIH_MUST( nih_strdup(NULL
, cg
) );
289 *file
= strrchr(cg
, '/');
294 p
= strrchr(*dir
, '/');
298 static size_t get_file_size(const char *contrl
, const char *cg
, const char *f
)
300 nih_local
char *data
= NULL
;
302 if (!cgm_get_value(contrl
, cg
, f
, &data
))
309 * FUSE ops for /cgroup
312 static int cg_getattr(const char *path
, struct stat
*sb
)
315 struct fuse_context
*fc
= fuse_get_context();
316 nih_local
char * cgdir
= NULL
;
317 char *fpath
= NULL
, *path1
, *path2
;
318 nih_local
struct cgm_keys
*k
= NULL
;
320 nih_local
char *controller
= NULL
;
326 memset(sb
, 0, sizeof(struct stat
));
328 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
331 sb
->st_uid
= sb
->st_gid
= 0;
332 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
335 if (strcmp(path
, "/cgroup") == 0) {
336 sb
->st_mode
= S_IFDIR
| 00755;
341 controller
= pick_controller_from_path(fc
, path
);
344 cgroup
= find_cgroup_in_path(path
);
346 /* this is just /cgroup/controller, return it as a dir */
347 sb
->st_mode
= S_IFDIR
| 00755;
352 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
362 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
363 * Then check that caller's cgroup is under path if fpath is a child
364 * cgroup, or cgdir if fpath is a file */
366 if (is_child_cgroup(controller
, path1
, path2
)) {
367 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
))
370 // get uid, gid, from '/tasks' file and make up a mode
371 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
372 sb
->st_mode
= S_IFDIR
| 00755;
373 k
= get_cgroup_key(controller
, cgroup
, "tasks");
375 fprintf(stderr
, "Failed to find a tasks file for %s\n", cgroup
);
376 sb
->st_uid
= sb
->st_gid
= 0;
378 fprintf(stderr
, "found a tasks file for %s\n", cgroup
);
386 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
387 if (!fc_may_access(fc
, controller
, path1
, path2
, O_RDONLY
))
390 sb
->st_mode
= S_IFREG
| k
->mode
;
394 sb
->st_size
= get_file_size(controller
, path1
, path2
);
401 static int cg_opendir(const char *path
, struct fuse_file_info
*fi
)
406 static int cg_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
407 struct fuse_file_info
*fi
)
409 struct fuse_context
*fc
= fuse_get_context();
414 if (strcmp(path
, "/cgroup") == 0) {
415 // get list of controllers
416 char **list
= LXCFS_DATA
? LXCFS_DATA
->subsystems
: NULL
;
421 /* TODO - collect the list of controllers at fuse_init */
422 for (i
= 0; list
[i
]; i
++) {
423 if (filler(buf
, list
[i
], NULL
, 0) != 0) {
430 // return list of keys for the controller, and list of child cgroups
431 nih_local
struct cgm_keys
**list
= NULL
;
433 nih_local
char *controller
= NULL
;
436 controller
= pick_controller_from_path(fc
, path
);
440 cgroup
= find_cgroup_in_path(path
);
442 /* this is just /cgroup/controller, return its contents */
446 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
))
449 if (!cgm_list_keys(controller
, cgroup
, &list
))
451 for (i
= 0; list
[i
]; i
++) {
452 if (filler(buf
, list
[i
]->name
, NULL
, 0) != 0) {
457 // now get the list of child cgroups
458 nih_local
char **clist
;
460 if (!cgm_list_children(controller
, cgroup
, &clist
))
462 for (i
= 0; clist
[i
]; i
++) {
463 if (filler(buf
, clist
[i
], NULL
, 0) != 0) {
470 static int cg_releasedir(const char *path
, struct fuse_file_info
*fi
)
475 static int cg_open(const char *path
, struct fuse_file_info
*fi
)
477 nih_local
char *controller
= NULL
;
479 char *fpath
= NULL
, *path1
, *path2
;
480 nih_local
char * cgdir
= NULL
;
481 nih_local
struct cgm_keys
*k
= NULL
;
482 struct fuse_context
*fc
= fuse_get_context();
487 controller
= pick_controller_from_path(fc
, path
);
490 cgroup
= find_cgroup_in_path(path
);
494 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
503 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
504 if (!fc_may_access(fc
, controller
, path1
, path2
, fi
->flags
))
507 /* TODO - we want to cache this info for read/write */
514 static int cg_read(const char *path
, char *buf
, size_t size
, off_t offset
,
515 struct fuse_file_info
*fi
)
517 nih_local
char *controller
= NULL
;
519 char *fpath
= NULL
, *path1
, *path2
;
520 struct fuse_context
*fc
= fuse_get_context();
521 nih_local
char * cgdir
= NULL
;
522 nih_local
struct cgm_keys
*k
= NULL
;
530 controller
= pick_controller_from_path(fc
, path
);
533 cgroup
= find_cgroup_in_path(path
);
537 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
546 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
547 nih_local
char *data
= NULL
;
550 if (!fc_may_access(fc
, controller
, path1
, path2
, O_RDONLY
))
553 if (!cgm_get_value(controller
, path1
, path2
, &data
))
559 memcpy(buf
, data
, s
);
567 int cg_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
568 struct fuse_file_info
*fi
)
570 nih_local
char *controller
= NULL
;
572 char *fpath
= NULL
, *path1
, *path2
;
573 struct fuse_context
*fc
= fuse_get_context();
574 nih_local
char * cgdir
= NULL
;
575 nih_local
struct cgm_keys
*k
= NULL
;
577 fprintf(stderr
, "cg_write: starting\n");
585 controller
= pick_controller_from_path(fc
, path
);
588 cgroup
= find_cgroup_in_path(path
);
592 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
601 if ((k
= get_cgroup_key(controller
, path1
, path2
)) != NULL
) {
602 if (!fc_may_access(fc
, controller
, path1
, path2
, O_WRONLY
))
605 if (!cgm_set_value(controller
, path1
, path2
, buf
))
615 int cg_mkdir(const char *path
, mode_t mode
)
617 struct fuse_context
*fc
= fuse_get_context();
618 nih_local
struct cgm_keys
**list
= NULL
;
619 char *fpath
= NULL
, *path1
;
620 nih_local
char * cgdir
= NULL
;
622 nih_local
char *controller
= NULL
;
628 controller
= pick_controller_from_path(fc
, path
);
632 cgroup
= find_cgroup_in_path(path
);
636 get_cgdir_and_path(cgroup
, &cgdir
, &fpath
);
642 if (!fc_may_access(fc
, controller
, path1
, NULL
, O_RDWR
))
646 if (!cgm_create(controller
, cgroup
, fc
->uid
, fc
->gid
))
656 static int proc_getattr(const char *path
, struct stat
*sb
)
658 if (strcmp(path
, "/proc") != 0)
660 sb
->st_mode
= S_IFDIR
| 00755;
667 * these just delegate to the /proc and /cgroup ops as
671 static int lxcfs_getattr(const char *path
, struct stat
*sb
)
673 if (strcmp(path
, "/") == 0) {
674 sb
->st_mode
= S_IFDIR
| 00755;
678 if (strncmp(path
, "/cgroup", 7) == 0) {
679 return cg_getattr(path
, sb
);
681 if (strncmp(path
, "/proc", 7) == 0) {
682 return proc_getattr(path
, sb
);
687 static int lxcfs_opendir(const char *path
, struct fuse_file_info
*fi
)
689 if (strcmp(path
, "/") == 0)
692 if (strncmp(path
, "/cgroup", 7) == 0) {
693 return cg_opendir(path
, fi
);
698 static int lxcfs_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
699 struct fuse_file_info
*fi
)
701 if (strcmp(path
, "/") == 0) {
702 if (filler(buf
, "proc", NULL
, 0) != 0 ||
703 filler(buf
, "cgroup", NULL
, 0) != 0)
707 if (strncmp(path
, "/cgroup", 7) == 0) {
708 return cg_readdir(path
, buf
, filler
, offset
, fi
);
713 static int lxcfs_releasedir(const char *path
, struct fuse_file_info
*fi
)
715 if (strcmp(path
, "/") == 0)
717 if (strncmp(path
, "/cgroup", 7) == 0) {
718 return cg_releasedir(path
, fi
);
723 static int lxcfs_open(const char *path
, struct fuse_file_info
*fi
)
725 if (strncmp(path
, "/cgroup", 7) == 0) {
726 return cg_open(path
, fi
);
732 static int lxcfs_read(const char *path
, char *buf
, size_t size
, off_t offset
,
733 struct fuse_file_info
*fi
)
735 if (strncmp(path
, "/cgroup", 7) == 0) {
736 return cg_read(path
, buf
, size
, offset
, fi
);
742 int lxcfs_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
743 struct fuse_file_info
*fi
)
745 if (strncmp(path
, "/cgroup", 7) == 0) {
746 return cg_write(path
, buf
, size
, offset
, fi
);
752 static int lxcfs_flush(const char *path
, struct fuse_file_info
*fi
)
757 static int lxcfs_release(const char *path
, struct fuse_file_info
*fi
)
762 static int lxcfs_fsync(const char *path
, int datasync
, struct fuse_file_info
*fi
)
767 int lxcfs_mkdir(const char *path
, mode_t mode
)
769 if (strncmp(path
, "/cgroup", 7) == 0)
770 return cg_mkdir(path
, mode
);
776 * cat first does a truncate before doing ops->write. This doesn't
777 * really make sense for cgroups. So just return 0 always but do
780 int lxcfs_truncate(const char *path
, off_t newsize
)
782 if (strncmp(path
, "/cgroup", 7) == 0)
787 const struct fuse_operations lxcfs_ops
= {
788 .getattr
= lxcfs_getattr
,
792 .mkdir
= lxcfs_mkdir
,
800 .truncate
= lxcfs_truncate
,
805 .release
= lxcfs_release
,
806 .write
= lxcfs_write
,
809 .flush
= lxcfs_flush
,
810 .fsync
= lxcfs_fsync
,
817 .opendir
= lxcfs_opendir
,
818 .readdir
= lxcfs_readdir
,
819 .releasedir
= lxcfs_releasedir
,
830 static void usage(const char *me
)
832 fprintf(stderr
, "Usage:\n");
833 fprintf(stderr
, "\n");
834 fprintf(stderr
, "%s [FUSE and mount options] mountpoint\n", me
);
838 static bool is_help(char *w
)
840 if (strcmp(w
, "-h") == 0 ||
841 strcmp(w
, "--help") == 0 ||
842 strcmp(w
, "-help") == 0 ||
843 strcmp(w
, "help") == 0)
848 int main(int argc
, char *argv
[])
851 struct lxcfs_state
*d
;
853 if (argc
< 2 || is_help(argv
[1]))
856 d
= malloc(sizeof(*d
));
860 if (!cgm_escape_cgroup())
861 fprintf(stderr
, "WARNING: failed to escape to root cgroup\n");
863 if (!cgm_get_controllers(&d
->subsystems
))
866 ret
= fuse_main(argc
, argv
, &lxcfs_ops
, d
);