]> git.proxmox.com Git - mirror_lxcfs.git/blame - lxcfs.c
getattr: fill in ownership of cgroup dirs.
[mirror_lxcfs.git] / lxcfs.c
CommitLineData
758ad80c
SH
1/* lxcfs
2 *
3 * Copyright © 2014 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20/*
21 * NOTES - make sure to run this as -s to avoid threading.
22 * TODO - can we enforce that here from the code?
23 */
24#define FUSE_USE_VERSION 26
25
2183082c 26#include <stdio.h>
758ad80c
SH
27#include <dirent.h>
28#include <fcntl.h>
29#include <fuse.h>
30#include <unistd.h>
31#include <errno.h>
32#include <stdbool.h>
33#include <time.h>
34#include <string.h>
35#include <stdlib.h>
36#include <libgen.h>
37
38#include <nih/alloc.h>
39#include <nih/string.h>
40
41#include "cgmanager.h"
42
43struct lxcfs_state {
44 /*
45 * a null-terminated, nih-allocated list of the mounted subsystems. We
46 * detect this at startup.
47 */
48 char **subsystems;
49};
50#define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data)
51
053a659d
SH
52/*
53 * Given a open file * to /proc/pid/{u,g}id_map, and an id
54 * valid in the caller's namespace, return the id mapped into
55 * pid's namespace.
56 * Returns the mapped id, or -1 on error.
57 */
58unsigned int
59convert_id_to_ns(FILE *idfile, unsigned int in_id)
60{
61 unsigned int nsuid, // base id for a range in the idfile's namespace
62 hostuid, // base id for a range in the caller's namespace
63 count; // number of ids in this range
64 char line[400];
65 int ret;
66
67 fseek(idfile, 0L, SEEK_SET);
68 while (fgets(line, 400, idfile)) {
69 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
70 if (ret != 3)
71 continue;
72 if (hostuid + count < hostuid || nsuid + count < nsuid) {
73 /*
74 * uids wrapped around - unexpected as this is a procfile,
75 * so just bail.
76 */
77 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s",
78 nsuid, hostuid, count, line);
79 return -1;
80 }
81 if (hostuid <= in_id && hostuid+count > in_id) {
82 /*
83 * now since hostuid <= in_id < hostuid+count, and
84 * hostuid+count and nsuid+count do not wrap around,
85 * we know that nsuid+(in_id-hostuid) which must be
86 * less that nsuid+(count) must not wrap around
87 */
88 return (in_id - hostuid) + nsuid;
89 }
90 }
91
92 // no answer found
93 return -1;
94}
95
758ad80c
SH
96static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim)
97{
053a659d
SH
98 nih_local char *fpath = NULL;
99 bool answer = false;
100 uid_t nsuid;
101
758ad80c
SH
102 if (uid == victim)
103 return true;
104
105 /* check /proc/pid/uid_map */
053a659d
SH
106 fpath = NIH_MUST( nih_sprintf(NULL, "/proc/%d/uid_map", pid) );
107 FILE *f = fopen(fpath, "r");
108 if (!f)
109 return false;
110
111 nsuid = convert_id_to_ns(f, uid);
112 if (nsuid)
113 goto out;
114
115 nsuid = convert_id_to_ns(f, victim);
116 if (nsuid == -1)
117 goto out;
118
119 answer = true;
120
121out:
122 fclose(f);
123 return answer;
758ad80c
SH
124}
125
126static bool perms_include(int fmode, mode_t req_mode)
127{
128 fprintf(stderr, "perms_include: checking whether %d includes %d\n",
129 fmode, req_mode);
130 return (fmode & req_mode) == req_mode;
131}
132
133/*
134 * check whether a fuse context may access a cgroup dir or file
135 *
136 * If file is not null, it is a cgroup file to check under cg.
137 * If file is null, then we are checking perms on cg itself.
138 *
139 * For files we can check the mode of the list_keys result.
140 * For cgroups, we must make assumptions based on the files under the
141 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
142 * yet.
143 */
144static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
145{
146 nih_local struct cgm_keys **list = NULL;
147 int i;
148
149 if (!file)
150 file = "tasks";
151
152 if (*file == '/')
153 file++;
154
155 if (!cgm_list_keys(contrl, cg, &list))
156 return false;
157 for (i = 0; list[i]; i++) {
158 if (strcmp(list[i]->name, file) == 0) {
159 struct cgm_keys *k = list[i];
758ad80c
SH
160 if (is_privileged_over(fc->pid, fc->uid, k->uid)) {
161 if (perms_include(k->mode >> 6, mode))
162 return true;
163 }
164 if (fc->gid == k->gid) {
165 if (perms_include(k->mode >> 3, mode))
166 return true;
167 }
168 return perms_include(k->mode, mode);
169 }
170 }
171
172 return false;
173}
174
175/*
176 * given /cgroup/freezer/a/b, return "freezer". this will be nih-allocated
177 * and needs to be nih_freed.
178 */
179static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
180{
181 const char *p1;
182 char *ret, *slash;
183
184 if (strlen(path) < 9)
185 return NULL;
186 p1 = path+8;
187 ret = nih_strdup(NULL, p1);
188 if (!ret)
189 return ret;
190 slash = strstr(ret, "/");
191 if (slash)
192 *slash = '\0';
193
194 /* verify that it is a subsystem */
195 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
196 int i;
197 if (!list) {
198 nih_free(ret);
199 return NULL;
200 }
201 for (i = 0; list[i]; i++) {
202 if (strcmp(list[i], ret) == 0)
203 return ret;
204 }
205 nih_free(ret);
206 return NULL;
207}
208
209/*
210 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
211 * Note that the returned value may include files (keynames) etc
212 */
213static const char *find_cgroup_in_path(const char *path)
214{
215 const char *p1;
216
217 if (strlen(path) < 9)
218 return NULL;
219 p1 = strstr(path+8, "/");
220 if (!p1)
221 return NULL;
222 return p1+1;
223}
224
225static bool is_child_cgroup(const char *contr, const char *dir, const char *f)
226{
227 nih_local char **list = NULL;
228 int i;
229
230 if (!f)
231 return false;
232 if (*f == '/')
233 f++;
234
235 if (!cgm_list_children(contr, dir, &list))
236 return false;
237 for (i = 0; list[i]; i++) {
238 if (strcmp(list[i], f) == 0)
239 return true;
240 }
241
242 return false;
243}
244
245static struct cgm_keys *get_cgroup_key(const char *contr, const char *dir, const char *f)
246{
247 nih_local struct cgm_keys **list = NULL;
248 struct cgm_keys *k;
249 int i;
250
251 if (!f)
252 return NULL;
253 if (*f == '/')
254 f++;
255 if (!cgm_list_keys(contr, dir, &list))
256 return NULL;
257 for (i = 0; list[i]; i++) {
258 if (strcmp(list[i]->name, f) == 0) {
259 k = NIH_MUST( nih_alloc(NULL, (sizeof(*k))) );
260 k->name = NIH_MUST( nih_strdup(k, list[i]->name) );
261 k->uid = list[i]->uid;
262 k->gid = list[i]->gid;
263 k->mode = list[i]->mode;
264 return k;
265 }
266 }
267
268 return NULL;
269}
270
271static void get_cgdir_and_path(const char *cg, char **dir, char **file)
272{
758ad80c
SH
273 char *p;
274
275 *dir = NIH_MUST( nih_strdup(NULL, cg) );
276 *file = strrchr(cg, '/');
277 if (!*file) {
278 *file = NULL;
279 return;
280 }
281 p = strrchr(*dir, '/');
282 *p = '\0';
283}
284
285/*
286 * gettattr fn for anything under /cgroup
287 */
288static int cg_getattr(const char *path, struct stat *sb)
289{
290 struct timespec now;
291 struct fuse_context *fc = fuse_get_context();
292 nih_local char * cgdir = NULL;
293 char *fpath = NULL, *path1, *path2;
294 nih_local struct cgm_keys *k = NULL;
295 const char *cgroup;
296 nih_local char *controller = NULL;
297
298
299 if (!fc)
300 return -EIO;
301
302 memset(sb, 0, sizeof(struct stat));
303
304 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
305 return -EINVAL;
306
307 sb->st_uid = sb->st_gid = 0;
308 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
309 sb->st_size = 0;
310
311 if (strcmp(path, "/cgroup") == 0) {
312 sb->st_mode = S_IFDIR | 00755;
313 sb->st_nlink = 2;
314 return 0;
315 }
316
317 controller = pick_controller_from_path(fc, path);
318 if (!controller)
319 return -EIO;
758ad80c
SH
320 cgroup = find_cgroup_in_path(path);
321 if (!cgroup) {
322 /* this is just /cgroup/controller, return it as a dir */
323 sb->st_mode = S_IFDIR | 00755;
324 sb->st_nlink = 2;
325 return 0;
326 }
327
758ad80c
SH
328 get_cgdir_and_path(cgroup, &cgdir, &fpath);
329
330 if (!fpath) {
331 path1 = "/";
332 path2 = cgdir;
333 } else {
334 path1 = cgdir;
335 path2 = fpath;
336 }
337
758ad80c
SH
338 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
339 * Then check that caller's cgroup is under path if fpath is a child
340 * cgroup, or cgdir if fpath is a file */
341
342 if (is_child_cgroup(controller, path1, path2)) {
343 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
344 return -EPERM;
345
053a659d
SH
346 // get uid, gid, from '/tasks' file and make up a mode
347 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
348 sb->st_mode = S_IFDIR | 00755;
349 k = get_cgroup_key(controller, cgroup, "tasks");
350 if (!k) {
351 fprintf(stderr, "Failed to find a tasks file for %s\n", cgroup);
352 sb->st_uid = sb->st_gid = 0;
353 } else {
354 fprintf(stderr, "found a tasks file for %s\n", cgroup);
355 sb->st_uid = k->uid;
356 sb->st_gid = k->gid;
357 }
758ad80c
SH
358 sb->st_nlink = 2;
359 return 0;
360 }
361
362 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
363 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY))
364 return -EPERM;
365
758ad80c 366 sb->st_mode = S_IFREG | k->mode;
053a659d 367 sb->st_nlink = 1;
758ad80c
SH
368 sb->st_uid = k->uid;
369 sb->st_gid = k->gid;
758ad80c
SH
370 return 0;
371 }
372
373 return -EINVAL;
374}
2183082c 375
758ad80c 376static int cg_opendir(const char *path, struct fuse_file_info *fi)
2183082c 377{
758ad80c
SH
378 return 0;
379}
380
381/*
382 * readdir function for anything under /cgroup
383 */
384static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
385 struct fuse_file_info *fi)
386{
387 struct fuse_context *fc = fuse_get_context();
388
389 if (!fc)
390 return -EIO;
391
392 if (strcmp(path, "/cgroup") == 0) {
393 // get list of controllers
394 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
395 int i;
396
397 if (!list)
398 return -EIO;
399 /* TODO - collect the list of controllers at fuse_init */
400 for (i = 0; list[i]; i++) {
401 if (filler(buf, list[i], NULL, 0) != 0) {
402 return -EIO;
403 }
404 }
405 return 0;
406 }
407
408 // return list of keys for the controller, and list of child cgroups
409 nih_local struct cgm_keys **list = NULL;
410 const char *cgroup;
411 nih_local char *controller = NULL;
412 int i;
413
414 controller = pick_controller_from_path(fc, path);
415 if (!controller)
416 return -EIO;
417
418 cgroup = find_cgroup_in_path(path);
419 if (!cgroup) {
420 /* this is just /cgroup/controller, return its contents */
421 cgroup = "/";
422 }
423
424 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
425 return -EPERM;
426
427 if (!cgm_list_keys(controller, cgroup, &list))
428 return -EINVAL;
429 for (i = 0; list[i]; i++) {
430 fprintf(stderr, "adding key %s\n", list[i]->name);
431 if (filler(buf, list[i]->name, NULL, 0) != 0) {
432 return -EIO;
433 }
434 }
435
436 // now get the list of child cgroups
437 nih_local char **clist;
438
439 if (!cgm_list_children(controller, cgroup, &clist))
440 return 0;
441 for (i = 0; clist[i]; i++) {
442 fprintf(stderr, "adding child %s\n", clist[i]);
443 if (filler(buf, clist[i], NULL, 0) != 0) {
444 return -EIO;
445 }
446 }
447 return 0;
448}
449
450static int cg_releasedir(const char *path, struct fuse_file_info *fi)
451{
452 return 0;
453}
454
455/*
456 * So far I'm not actually using cg_ops and proc_ops, but listing them
457 * here makes it clearer who is supporting what. Still I prefer to
458 * call the real functions and not cg_ops->getattr.
459 */
460const struct fuse_operations cg_ops = {
461 .getattr = cg_getattr,
462 .readlink = NULL,
463 .getdir = NULL,
464 .mknod = NULL,
465 .mkdir = NULL,
466 .unlink = NULL,
467 .rmdir = NULL,
468 .symlink = NULL,
469 .rename = NULL,
470 .link = NULL,
471 .chmod = NULL,
472 .chown = NULL,
473 .truncate = NULL,
474 .utime = NULL,
475 .open = NULL,
476 .read = NULL,
477 .write = NULL,
478 .statfs = NULL,
479 .flush = NULL,
480 .release = NULL,
481 .fsync = NULL,
482
483 .setxattr = NULL,
484 .getxattr = NULL,
485 .listxattr = NULL,
486 .removexattr = NULL,
487
488 .opendir = cg_opendir,
489 .readdir = cg_readdir,
490 .releasedir = cg_releasedir,
491
492 .fsyncdir = NULL,
493 .init = NULL,
494 .destroy = NULL,
495 .access = NULL,
496 .create = NULL,
497 .ftruncate = NULL,
498 .fgetattr = NULL,
499};
500
501static int proc_getattr(const char *path, struct stat *sb)
502{
503 if (strcmp(path, "/proc") != 0)
504 return -EINVAL;
505 sb->st_mode = S_IFDIR | 00755;
506 sb->st_nlink = 2;
507 return 0;
508}
509
510const struct fuse_operations proc_ops = {
511 .getattr = proc_getattr,
512 .readlink = NULL,
513 .getdir = NULL,
514 .mknod = NULL,
515 .mkdir = NULL,
516 .unlink = NULL,
517 .rmdir = NULL,
518 .symlink = NULL,
519 .rename = NULL,
520 .link = NULL,
521 .chmod = NULL,
522 .chown = NULL,
523 .truncate = NULL,
524 .utime = NULL,
525 .open = NULL,
526 .read = NULL,
527 .write = NULL,
528 .statfs = NULL,
529 .flush = NULL,
530 .release = NULL,
531 .fsync = NULL,
532
533 .setxattr = NULL,
534 .getxattr = NULL,
535 .listxattr = NULL,
536 .removexattr = NULL,
537
538 .opendir = NULL,
539 .readdir = NULL,
540 .releasedir = NULL,
541
542 .fsyncdir = NULL,
543 .init = NULL,
544 .destroy = NULL,
545 .access = NULL,
546 .create = NULL,
547 .ftruncate = NULL,
548 .fgetattr = NULL,
549};
550
551static int lxcfs_getattr(const char *path, struct stat *sb)
552{
553 if (strcmp(path, "/") == 0) {
554 sb->st_mode = S_IFDIR | 00755;
555 sb->st_nlink = 2;
556 return 0;
557 }
558 if (strncmp(path, "/cgroup", 7) == 0) {
559 return cg_getattr(path, sb);
560 }
561 if (strncmp(path, "/proc", 7) == 0) {
562 return proc_getattr(path, sb);
563 }
564 return -EINVAL;
565}
566
567static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
568{
569 if (strcmp(path, "/") == 0)
570 return 0;
571
572 if (strncmp(path, "/cgroup", 7) == 0) {
573 return cg_opendir(path, fi);
574 }
575 return -EINVAL;
576}
577
578static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
579 struct fuse_file_info *fi)
580{
581 if (strcmp(path, "/") == 0) {
582 if (filler(buf, "proc", NULL, 0) != 0 ||
583 filler(buf, "cgroup", NULL, 0) != 0)
584 return -EINVAL;
585 return 0;
586 }
587 if (strncmp(path, "/cgroup", 7) == 0) {
588 return cg_readdir(path, buf, filler, offset, fi);
589 }
590 return -EINVAL;
591}
592
593static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
594{
595 if (strcmp(path, "/") == 0)
596 return 0;
597 if (strncmp(path, "/cgroup", 7) == 0) {
598 return cg_releasedir(path, fi);
599 }
600 return -EINVAL;
601}
602
603void *bb_init(struct fuse_conn_info *conn)
604{
605 return LXCFS_DATA;
606}
607
608const struct fuse_operations lxcfs_ops = {
609 .getattr = lxcfs_getattr,
610 .readlink = NULL,
611 .getdir = NULL,
612 .mknod = NULL,
613 .mkdir = NULL,
614 .unlink = NULL,
615 .rmdir = NULL,
616 .symlink = NULL,
617 .rename = NULL,
618 .link = NULL,
619 .chmod = NULL,
620 .chown = NULL,
621 .truncate = NULL,
622 .utime = NULL,
623 .open = NULL,
624 .read = NULL,
625 .write = NULL,
626 .statfs = NULL,
627 .flush = NULL,
628 .release = NULL,
629 .fsync = NULL,
630
631 .setxattr = NULL,
632 .getxattr = NULL,
633 .listxattr = NULL,
634 .removexattr = NULL,
635
636 .opendir = lxcfs_opendir,
637 .readdir = lxcfs_readdir,
638 .releasedir = lxcfs_releasedir,
639
640 .fsyncdir = NULL,
641 .init = NULL,
642 .destroy = NULL,
643 .access = NULL,
644 .create = NULL,
645 .ftruncate = NULL,
646 .fgetattr = NULL,
647};
648
649void usage(const char *me)
650{
651 fprintf(stderr, "Usage:\n");
652 fprintf(stderr, "\n");
653 fprintf(stderr, "%s [FUSE and mount options] mountpoint\n", me);
654 exit(1);
655}
656
657bool is_help(char *w)
658{
659 if (strcmp(w, "-h") == 0 ||
660 strcmp(w, "--help") == 0 ||
661 strcmp(w, "-help") == 0 ||
662 strcmp(w, "help") == 0)
663 return true;
664 return false;
665}
666
667int main(int argc, char *argv[])
668{
669 int ret;
670 struct lxcfs_state *d;
671
672 if (argc < 2 || is_help(argv[1]))
673 usage(argv[0]);
674
675 d = malloc(sizeof(*d));
676 if (!d)
677 return -1;
678
679 if (!cgm_escape_cgroup())
680 fprintf(stderr, "WARNING: failed to escape to root cgroup\n");
681
682 if (!cgm_get_controllers(&d->subsystems))
683 return -1;
684
685 ret = fuse_main(argc, argv, &lxcfs_ops, d);
686
687 return ret;
2183082c 688}