]> git.proxmox.com Git - mirror_lxcfs.git/blob - lxcfs.c
implement read for meminfo
[mirror_lxcfs.git] / lxcfs.c
1 /* lxcfs
2 *
3 * Copyright © 2014 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
6 * See COPYING file for details.
7 */
8
9 /*
10 * NOTES - make sure to run this as -s to avoid threading.
11 * TODO - can we enforce that here from the code?
12 */
13 #define FUSE_USE_VERSION 26
14
15 #include <stdio.h>
16 #include <dirent.h>
17 #include <fcntl.h>
18 #include <fuse.h>
19 #include <unistd.h>
20 #include <errno.h>
21 #include <stdbool.h>
22 #include <time.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <libgen.h>
26
27 #include <nih/alloc.h>
28 #include <nih/string.h>
29
30 #include "cgmanager.h"
31
32 struct lxcfs_state {
33 /*
34 * a null-terminated, nih-allocated list of the mounted subsystems. We
35 * detect this at startup.
36 */
37 char **subsystems;
38 };
39 #define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data)
40
41 /*
42 * Given a open file * to /proc/pid/{u,g}id_map, and an id
43 * valid in the caller's namespace, return the id mapped into
44 * pid's namespace.
45 * Returns the mapped id, or -1 on error.
46 */
47 unsigned int
48 convert_id_to_ns(FILE *idfile, unsigned int in_id)
49 {
50 unsigned int nsuid, // base id for a range in the idfile's namespace
51 hostuid, // base id for a range in the caller's namespace
52 count; // number of ids in this range
53 char line[400];
54 int ret;
55
56 fseek(idfile, 0L, SEEK_SET);
57 while (fgets(line, 400, idfile)) {
58 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
59 if (ret != 3)
60 continue;
61 if (hostuid + count < hostuid || nsuid + count < nsuid) {
62 /*
63 * uids wrapped around - unexpected as this is a procfile,
64 * so just bail.
65 */
66 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s",
67 nsuid, hostuid, count, line);
68 return -1;
69 }
70 if (hostuid <= in_id && hostuid+count > in_id) {
71 /*
72 * now since hostuid <= in_id < hostuid+count, and
73 * hostuid+count and nsuid+count do not wrap around,
74 * we know that nsuid+(in_id-hostuid) which must be
75 * less that nsuid+(count) must not wrap around
76 */
77 return (in_id - hostuid) + nsuid;
78 }
79 }
80
81 // no answer found
82 return -1;
83 }
84
85 /*
86 * for is_privileged_over,
87 * specify whether we require the calling uid to be root in his
88 * namespace
89 */
90 #define NS_ROOT_REQD true
91 #define NS_ROOT_OPT false
92
93 static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
94 {
95 nih_local char *fpath = NULL;
96 bool answer = false;
97 uid_t nsuid;
98
99 if (victim == -1 || uid == -1)
100 return false;
101
102 /*
103 * If the request is one not requiring root in the namespace,
104 * then having the same uid suffices. (i.e. uid 1000 has write
105 * access to files owned by uid 1000
106 */
107 if (!req_ns_root && uid == victim)
108 return true;
109
110 fpath = NIH_MUST( nih_sprintf(NULL, "/proc/%d/uid_map", pid) );
111 FILE *f = fopen(fpath, "r");
112 if (!f)
113 return false;
114
115 /* if caller's not root in his namespace, reject */
116 nsuid = convert_id_to_ns(f, uid);
117 if (nsuid)
118 goto out;
119
120 /*
121 * If victim is not mapped into caller's ns, reject.
122 * XXX I'm not sure this check is needed given that fuse
123 * will be sending requests where the vfs has converted
124 */
125 nsuid = convert_id_to_ns(f, victim);
126 if (nsuid == -1)
127 goto out;
128
129 answer = true;
130
131 out:
132 fclose(f);
133 return answer;
134 }
135
136 static bool perms_include(int fmode, mode_t req_mode)
137 {
138 mode_t r;
139
140 switch (req_mode & O_ACCMODE) {
141 case O_RDONLY:
142 r = S_IROTH;
143 break;
144 case O_WRONLY:
145 r = S_IWOTH;
146 break;
147 case O_RDWR:
148 r = S_IROTH | S_IWOTH;
149 break;
150 default:
151 return false;
152 }
153 return ((fmode & r) == r);
154 }
155
156 static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
157 {
158 char *start, *end;
159
160 if (strlen(taskcg) <= strlen(querycg)) {
161 fprintf(stderr, "%s: I was fed bad input\n", __func__);
162 return NULL;
163 }
164
165 if (strcmp(querycg, "/") == 0)
166 start = NIH_MUST( nih_strdup(NULL, taskcg + 1) );
167 else
168 start = NIH_MUST( nih_strdup(NULL, taskcg + strlen(querycg) + 1) );
169 end = strchr(start, '/');
170 if (end)
171 *end = '\0';
172 return start;
173 }
174
175 /*
176 * check whether a fuse context may access a cgroup dir or file
177 *
178 * If file is not null, it is a cgroup file to check under cg.
179 * If file is null, then we are checking perms on cg itself.
180 *
181 * For files we can check the mode of the list_keys result.
182 * For cgroups, we must make assumptions based on the files under the
183 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
184 * yet.
185 */
186 static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
187 {
188 nih_local struct cgm_keys **list = NULL;
189 int i;
190
191 if (!file)
192 file = "tasks";
193
194 if (*file == '/')
195 file++;
196
197 if (!cgm_list_keys(contrl, cg, &list))
198 return false;
199 for (i = 0; list[i]; i++) {
200 if (strcmp(list[i]->name, file) == 0) {
201 struct cgm_keys *k = list[i];
202 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
203 if (perms_include(k->mode >> 6, mode))
204 return true;
205 }
206 if (fc->gid == k->gid) {
207 if (perms_include(k->mode >> 3, mode))
208 return true;
209 }
210 return perms_include(k->mode, mode);
211 }
212 }
213
214 return false;
215 }
216
217 static void stripnewline(char *x)
218 {
219 size_t l = strlen(x);
220 if (l && x[l-1] == '\n')
221 x[l-1] = '\0';
222 }
223
224 /*
225 * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
226 * If caller is in /a, he may act on /a/b, but not on /b.
227 * if the answer is false and nextcg is not NULL, then *nextcg will point
228 * to a nih_alloc'd string containing the next cgroup directory under cg
229 */
230 static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
231 {
232 nih_local char *fnam = NULL;
233 FILE *f;
234 bool answer = false;
235 char *line = NULL;
236 size_t len = 0;
237
238 fnam = NIH_MUST( nih_sprintf(NULL, "/proc/%d/cgroup", pid) );
239 if (!(f = fopen(fnam, "r")))
240 return false;
241
242 while (getline(&line, &len, f) != -1) {
243 char *c1, *c2, *linecmp;
244 if (!line[0])
245 continue;
246 c1 = strchr(line, ':');
247 if (!c1)
248 goto out;
249 c1++;
250 c2 = strchr(c1, ':');
251 if (!c2)
252 goto out;
253 *c2 = '\0';
254 if (strcmp(c1, contrl) != 0)
255 continue;
256 c2++;
257 stripnewline(c2);
258 /*
259 * callers pass in '/' for root cgroup, otherwise they pass
260 * in a cgroup without leading '/'
261 */
262 linecmp = *cg == '/' ? c2 : c2+1;
263 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
264 if (nextcg)
265 *nextcg = get_next_cgroup_dir(linecmp, cg);
266 goto out;
267 }
268 answer = true;
269 goto out;
270 }
271
272 out:
273 fclose(f);
274 free(line);
275 return answer;
276 }
277
278 /*
279 * given /cgroup/freezer/a/b, return "freezer". this will be nih-allocated
280 * and needs to be nih_freed.
281 */
282 static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
283 {
284 const char *p1;
285 char *ret, *slash;
286
287 if (strlen(path) < 9)
288 return NULL;
289 p1 = path+8;
290 ret = nih_strdup(NULL, p1);
291 if (!ret)
292 return ret;
293 slash = strstr(ret, "/");
294 if (slash)
295 *slash = '\0';
296
297 /* verify that it is a subsystem */
298 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
299 int i;
300 if (!list) {
301 nih_free(ret);
302 return NULL;
303 }
304 for (i = 0; list[i]; i++) {
305 if (strcmp(list[i], ret) == 0)
306 return ret;
307 }
308 nih_free(ret);
309 return NULL;
310 }
311
312 /*
313 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
314 * Note that the returned value may include files (keynames) etc
315 */
316 static const char *find_cgroup_in_path(const char *path)
317 {
318 const char *p1;
319
320 if (strlen(path) < 9)
321 return NULL;
322 p1 = strstr(path+8, "/");
323 if (!p1)
324 return NULL;
325 return p1+1;
326 }
327
328 static bool is_child_cgroup(const char *contr, const char *dir, const char *f)
329 {
330 nih_local char **list = NULL;
331 int i;
332
333 if (!f)
334 return false;
335 if (*f == '/')
336 f++;
337
338 if (!cgm_list_children(contr, dir, &list))
339 return false;
340 for (i = 0; list[i]; i++) {
341 if (strcmp(list[i], f) == 0)
342 return true;
343 }
344
345 return false;
346 }
347
348 static struct cgm_keys *get_cgroup_key(const char *contr, const char *dir, const char *f)
349 {
350 nih_local struct cgm_keys **list = NULL;
351 struct cgm_keys *k;
352 int i;
353
354 if (!f)
355 return NULL;
356 if (*f == '/')
357 f++;
358 if (!cgm_list_keys(contr, dir, &list))
359 return NULL;
360 for (i = 0; list[i]; i++) {
361 if (strcmp(list[i]->name, f) == 0) {
362 k = NIH_MUST( nih_alloc(NULL, (sizeof(*k))) );
363 k->name = NIH_MUST( nih_strdup(k, list[i]->name) );
364 k->uid = list[i]->uid;
365 k->gid = list[i]->gid;
366 k->mode = list[i]->mode;
367 return k;
368 }
369 }
370
371 return NULL;
372 }
373
374 static void get_cgdir_and_path(const char *cg, char **dir, char **file)
375 {
376 char *p;
377
378 *dir = NIH_MUST( nih_strdup(NULL, cg) );
379 *file = strrchr(cg, '/');
380 if (!*file) {
381 *file = NULL;
382 return;
383 }
384 p = strrchr(*dir, '/');
385 *p = '\0';
386 }
387
388 static size_t get_file_size(const char *contrl, const char *cg, const char *f)
389 {
390 nih_local char *data = NULL;
391 size_t s;
392 if (!cgm_get_value(contrl, cg, f, &data))
393 return -EINVAL;
394 s = strlen(data);
395 return s;
396 }
397
398 /*
399 * FUSE ops for /cgroup
400 */
401
402 static int cg_getattr(const char *path, struct stat *sb)
403 {
404 struct timespec now;
405 struct fuse_context *fc = fuse_get_context();
406 nih_local char * cgdir = NULL;
407 char *fpath = NULL, *path1, *path2;
408 nih_local struct cgm_keys *k = NULL;
409 const char *cgroup;
410 nih_local char *controller = NULL;
411
412
413 if (!fc)
414 return -EIO;
415
416 memset(sb, 0, sizeof(struct stat));
417
418 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
419 return -EINVAL;
420
421 sb->st_uid = sb->st_gid = 0;
422 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
423 sb->st_size = 0;
424
425 if (strcmp(path, "/cgroup") == 0) {
426 sb->st_mode = S_IFDIR | 00755;
427 sb->st_nlink = 2;
428 return 0;
429 }
430
431 controller = pick_controller_from_path(fc, path);
432 if (!controller)
433 return -EIO;
434 cgroup = find_cgroup_in_path(path);
435 if (!cgroup) {
436 /* this is just /cgroup/controller, return it as a dir */
437 sb->st_mode = S_IFDIR | 00755;
438 sb->st_nlink = 2;
439 return 0;
440 }
441
442 get_cgdir_and_path(cgroup, &cgdir, &fpath);
443
444 if (!fpath) {
445 path1 = "/";
446 path2 = cgdir;
447 } else {
448 path1 = cgdir;
449 path2 = fpath;
450 }
451
452 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
453 * Then check that caller's cgroup is under path if fpath is a child
454 * cgroup, or cgdir if fpath is a file */
455
456 if (is_child_cgroup(controller, path1, path2)) {
457 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
458 /* this is just /cgroup/controller, return it as a dir */
459 sb->st_mode = S_IFDIR | 00555;
460 sb->st_nlink = 2;
461 return 0;
462 }
463 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
464 return -EACCES;
465
466 // get uid, gid, from '/tasks' file and make up a mode
467 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
468 sb->st_mode = S_IFDIR | 00755;
469 k = get_cgroup_key(controller, cgroup, "tasks");
470 if (!k) {
471 sb->st_uid = sb->st_gid = 0;
472 } else {
473 sb->st_uid = k->uid;
474 sb->st_gid = k->gid;
475 }
476 sb->st_nlink = 2;
477 return 0;
478 }
479
480 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
481 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL))
482 return -ENOENT;
483 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY))
484 return -EACCES;
485
486 sb->st_mode = S_IFREG | k->mode;
487 sb->st_nlink = 1;
488 sb->st_uid = k->uid;
489 sb->st_gid = k->gid;
490 sb->st_size = get_file_size(controller, path1, path2);
491 return 0;
492 }
493
494 return -ENOENT;
495 }
496
497 /*
498 * TODO - cache these results in a table for use in opendir, free
499 * in releasedir
500 */
501 static int cg_opendir(const char *path, struct fuse_file_info *fi)
502 {
503 struct fuse_context *fc = fuse_get_context();
504 nih_local struct cgm_keys **list = NULL;
505 const char *cgroup;
506 nih_local char *controller = NULL;
507 nih_local char *nextcg = NULL;
508
509 if (!fc)
510 return -EIO;
511
512 if (strcmp(path, "/cgroup") == 0)
513 return 0;
514
515 // return list of keys for the controller, and list of child cgroups
516 controller = pick_controller_from_path(fc, path);
517 if (!controller)
518 return -EIO;
519
520 cgroup = find_cgroup_in_path(path);
521 if (!cgroup) {
522 /* this is just /cgroup/controller, return its contents */
523 cgroup = "/";
524 }
525
526 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
527 return -EACCES;
528 return 0;
529 }
530
531 static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
532 struct fuse_file_info *fi)
533 {
534 struct fuse_context *fc = fuse_get_context();
535
536 if (!fc)
537 return -EIO;
538
539 if (strcmp(path, "/cgroup") == 0) {
540 // get list of controllers
541 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
542 int i;
543
544 if (!list)
545 return -EIO;
546
547 for (i = 0; list[i]; i++) {
548 if (filler(buf, list[i], NULL, 0) != 0) {
549 return -EIO;
550 }
551 }
552 return 0;
553 }
554
555 // return list of keys for the controller, and list of child cgroups
556 nih_local struct cgm_keys **list = NULL;
557 const char *cgroup;
558 nih_local char *controller = NULL;
559 int i;
560 nih_local char *nextcg = NULL;
561
562 controller = pick_controller_from_path(fc, path);
563 if (!controller)
564 return -EIO;
565
566 cgroup = find_cgroup_in_path(path);
567 if (!cgroup) {
568 /* this is just /cgroup/controller, return its contents */
569 cgroup = "/";
570 }
571
572 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
573 return -EACCES;
574
575 if (!cgm_list_keys(controller, cgroup, &list))
576 // not a valid cgroup
577 return -EINVAL;
578
579 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, &nextcg)) {
580 if (nextcg) {
581 int ret;
582 ret = filler(buf, nextcg, NULL, 0);
583 if (ret != 0)
584 return -EIO;
585 }
586 return 0;
587 }
588
589 for (i = 0; list[i]; i++) {
590 if (filler(buf, list[i]->name, NULL, 0) != 0) {
591 return -EIO;
592 }
593 }
594
595 // now get the list of child cgroups
596 nih_local char **clist;
597
598 if (!cgm_list_children(controller, cgroup, &clist))
599 return 0;
600 for (i = 0; clist[i]; i++) {
601 if (filler(buf, clist[i], NULL, 0) != 0) {
602 return -EIO;
603 }
604 }
605 return 0;
606 }
607
608 static int cg_releasedir(const char *path, struct fuse_file_info *fi)
609 {
610 return 0;
611 }
612
613 /*
614 * TODO - cache info here for read/write, release in cg_release.
615 */
616 static int cg_open(const char *path, struct fuse_file_info *fi)
617 {
618 nih_local char *controller = NULL;
619 const char *cgroup;
620 char *fpath = NULL, *path1, *path2;
621 nih_local char * cgdir = NULL;
622 nih_local struct cgm_keys *k = NULL;
623 struct fuse_context *fc = fuse_get_context();
624
625 if (!fc)
626 return -EIO;
627
628 controller = pick_controller_from_path(fc, path);
629 if (!controller)
630 return -EIO;
631 cgroup = find_cgroup_in_path(path);
632 if (!cgroup)
633 return -EINVAL;
634
635 get_cgdir_and_path(cgroup, &cgdir, &fpath);
636 if (!fpath) {
637 path1 = "/";
638 path2 = cgdir;
639 } else {
640 path1 = cgdir;
641 path2 = fpath;
642 }
643
644 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
645 if (!fc_may_access(fc, controller, path1, path2, fi->flags))
646 // should never get here
647 return -EACCES;
648
649 return 0;
650 }
651
652 return -EINVAL;
653 }
654
655 static int cg_read(const char *path, char *buf, size_t size, off_t offset,
656 struct fuse_file_info *fi)
657 {
658 nih_local char *controller = NULL;
659 const char *cgroup;
660 char *fpath = NULL, *path1, *path2;
661 struct fuse_context *fc = fuse_get_context();
662 nih_local char * cgdir = NULL;
663 nih_local struct cgm_keys *k = NULL;
664
665 if (offset)
666 return -EIO;
667
668 if (!fc)
669 return -EIO;
670
671 controller = pick_controller_from_path(fc, path);
672 if (!controller)
673 return -EINVAL;
674 cgroup = find_cgroup_in_path(path);
675 if (!cgroup)
676 return -EINVAL;
677
678 get_cgdir_and_path(cgroup, &cgdir, &fpath);
679 if (!fpath) {
680 path1 = "/";
681 path2 = cgdir;
682 } else {
683 path1 = cgdir;
684 path2 = fpath;
685 }
686
687 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
688 nih_local char *data = NULL;
689 int s;
690
691 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY))
692 // should never get here
693 return -EACCES;
694
695 if (!cgm_get_value(controller, path1, path2, &data))
696 return -EINVAL;
697
698 s = strlen(data);
699 if (s > size)
700 s = size;
701 memcpy(buf, data, s);
702
703 return s;
704 }
705
706 return -EINVAL;
707 }
708
709 int cg_write(const char *path, const char *buf, size_t size, off_t offset,
710 struct fuse_file_info *fi)
711 {
712 nih_local char *controller = NULL;
713 const char *cgroup;
714 char *fpath = NULL, *path1, *path2;
715 struct fuse_context *fc = fuse_get_context();
716 nih_local char * cgdir = NULL;
717 nih_local struct cgm_keys *k = NULL;
718
719 if (offset)
720 return -EINVAL;
721
722 if (!fc)
723 return -EIO;
724
725 controller = pick_controller_from_path(fc, path);
726 if (!controller)
727 return -EINVAL;
728 cgroup = find_cgroup_in_path(path);
729 if (!cgroup)
730 return -EINVAL;
731
732 get_cgdir_and_path(cgroup, &cgdir, &fpath);
733 if (!fpath) {
734 path1 = "/";
735 path2 = cgdir;
736 } else {
737 path1 = cgdir;
738 path2 = fpath;
739 }
740
741 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
742 if (!fc_may_access(fc, controller, path1, path2, O_WRONLY))
743 return -EACCES;
744
745 if (!cgm_set_value(controller, path1, path2, buf))
746 return -EINVAL;
747
748 return size;
749 }
750
751 return -EINVAL;
752 }
753
754 int cg_chown(const char *path, uid_t uid, gid_t gid)
755 {
756 struct fuse_context *fc = fuse_get_context();
757 nih_local char * cgdir = NULL;
758 char *fpath = NULL, *path1, *path2;
759 nih_local struct cgm_keys *k = NULL;
760 const char *cgroup;
761 nih_local char *controller = NULL;
762
763
764 if (!fc)
765 return -EIO;
766
767 if (strcmp(path, "/cgroup") == 0)
768 return -EINVAL;
769
770 controller = pick_controller_from_path(fc, path);
771 if (!controller)
772 return -EINVAL;
773 cgroup = find_cgroup_in_path(path);
774 if (!cgroup)
775 /* this is just /cgroup/controller */
776 return -EINVAL;
777
778 get_cgdir_and_path(cgroup, &cgdir, &fpath);
779
780 if (!fpath) {
781 path1 = "/";
782 path2 = cgdir;
783 } else {
784 path1 = cgdir;
785 path2 = fpath;
786 }
787
788 if (is_child_cgroup(controller, path1, path2)) {
789 // get uid, gid, from '/tasks' file and make up a mode
790 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
791 k = get_cgroup_key(controller, cgroup, "tasks");
792
793 } else
794 k = get_cgroup_key(controller, path1, path2);
795
796 if (!k)
797 return -EINVAL;
798
799 /*
800 * This being a fuse request, the uid and gid must be valid
801 * in the caller's namespace. So we can just check to make
802 * sure that the caller is root in his uid, and privileged
803 * over the file's current owner.
804 */
805 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD))
806 return -EACCES;
807
808 if (!cgm_chown_file(controller, cgroup, uid, gid))
809 return -EINVAL;
810 return 0;
811 }
812
813 int cg_chmod(const char *path, mode_t mode)
814 {
815 struct fuse_context *fc = fuse_get_context();
816 nih_local char * cgdir = NULL;
817 char *fpath = NULL, *path1, *path2;
818 nih_local struct cgm_keys *k = NULL;
819 const char *cgroup;
820 nih_local char *controller = NULL;
821
822 if (!fc)
823 return -EIO;
824
825 if (strcmp(path, "/cgroup") == 0)
826 return -EINVAL;
827
828 controller = pick_controller_from_path(fc, path);
829 if (!controller)
830 return -EINVAL;
831 cgroup = find_cgroup_in_path(path);
832 if (!cgroup)
833 /* this is just /cgroup/controller */
834 return -EINVAL;
835
836 get_cgdir_and_path(cgroup, &cgdir, &fpath);
837
838 if (!fpath) {
839 path1 = "/";
840 path2 = cgdir;
841 } else {
842 path1 = cgdir;
843 path2 = fpath;
844 }
845
846 if (is_child_cgroup(controller, path1, path2)) {
847 // get uid, gid, from '/tasks' file and make up a mode
848 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
849 k = get_cgroup_key(controller, cgroup, "tasks");
850
851 } else
852 k = get_cgroup_key(controller, path1, path2);
853
854 if (!k)
855 return -EINVAL;
856
857 /*
858 * This being a fuse request, the uid and gid must be valid
859 * in the caller's namespace. So we can just check to make
860 * sure that the caller is root in his uid, and privileged
861 * over the file's current owner.
862 */
863 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT))
864 return -EPERM;
865
866 if (!cgm_chmod_file(controller, cgroup, mode))
867 return -EINVAL;
868 return 0;
869 }
870
871 int cg_mkdir(const char *path, mode_t mode)
872 {
873 struct fuse_context *fc = fuse_get_context();
874 nih_local struct cgm_keys **list = NULL;
875 char *fpath = NULL, *path1;
876 nih_local char * cgdir = NULL;
877 const char *cgroup;
878 nih_local char *controller = NULL;
879
880 if (!fc)
881 return -EIO;
882
883
884 controller = pick_controller_from_path(fc, path);
885 if (!controller)
886 return -EINVAL;
887
888 cgroup = find_cgroup_in_path(path);
889 if (!cgroup)
890 return -EINVAL;
891
892 get_cgdir_and_path(cgroup, &cgdir, &fpath);
893 if (!fpath)
894 path1 = "/";
895 else
896 path1 = cgdir;
897
898 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR))
899 return -EACCES;
900
901
902 if (!cgm_create(controller, cgroup, fc->uid, fc->gid))
903 return -EINVAL;
904
905 return 0;
906 }
907
908 static int cg_rmdir(const char *path)
909 {
910 struct fuse_context *fc = fuse_get_context();
911 nih_local struct cgm_keys **list = NULL;
912 char *fpath = NULL;
913 nih_local char * cgdir = NULL;
914 const char *cgroup;
915 nih_local char *controller = NULL;
916
917 if (!fc)
918 return -EIO;
919
920
921 controller = pick_controller_from_path(fc, path);
922 if (!controller)
923 return -EINVAL;
924
925 cgroup = find_cgroup_in_path(path);
926 if (!cgroup)
927 return -EINVAL;
928
929 get_cgdir_and_path(cgroup, &cgdir, &fpath);
930 if (!fpath)
931 return -EINVAL;
932
933 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY))
934 return -EACCES;
935
936 if (!cgm_remove(controller, cgroup))
937 return -EINVAL;
938
939 return 0;
940 }
941
942 static bool startswith(const char *line, const char *pref)
943 {
944 if (strncmp(line, pref, strlen(pref)) == 0)
945 return true;
946 return false;
947 }
948
949 static void get_mem_cached(char *memstat, unsigned long *v)
950 {
951 char *eol;
952
953 *v = 0;
954 while (*memstat) {
955 if (startswith(memstat, "total_cache")) {
956 sscanf(memstat + 11, "%lu", v);
957 *v /= 1024;
958 return;
959 }
960 eol = strchr(memstat, '\n');
961 if (!eol)
962 return;
963 memstat = eol+1;
964 }
965 }
966
967 static char *get_pid_cgroup(pid_t pid, const char *contrl)
968 {
969 nih_local char *fnam = NULL;
970 FILE *f;
971 char *answer = NULL;
972 char *line = NULL;
973 size_t len = 0;
974
975 fnam = NIH_MUST( nih_sprintf(NULL, "/proc/%d/cgroup", pid) );
976 if (!(f = fopen(fnam, "r")))
977 return false;
978
979 while (getline(&line, &len, f) != -1) {
980 char *c1, *c2;
981 if (!line[0])
982 continue;
983 c1 = strchr(line, ':');
984 if (!c1)
985 goto out;
986 c1++;
987 c2 = strchr(c1, ':');
988 if (!c2)
989 goto out;
990 *c2 = '\0';
991 if (strcmp(c1, contrl) != 0)
992 continue;
993 c2++;
994 stripnewline(c2);
995 answer = NIH_MUST( nih_strdup(NULL, c2) );
996 goto out;
997 }
998
999 out:
1000 fclose(f);
1001 free(line);
1002 return answer;
1003 }
1004
1005 /*
1006 * FUSE ops for /proc
1007 */
1008
1009 static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1010 struct fuse_file_info *fi)
1011 {
1012 struct fuse_context *fc = fuse_get_context();
1013 nih_local char *cg = get_pid_cgroup(fc->pid, "memory");
1014 nih_local char *memlimit_str = NULL, *memusage_str = NULL, *memstat_str = NULL;
1015 unsigned long memlimit = 0, memusage = 0, cached = 0, hosttotal = 0;
1016 char *line = NULL;
1017 size_t linelen = 0, total_len = 0;
1018 FILE *f;
1019
1020 if (offset)
1021 return -EINVAL;
1022
1023 if (!cg)
1024 return 0;
1025
1026 if (!cgm_get_value("memory", cg, "memory.limit_in_bytes", &memlimit_str))
1027 return 0;
1028 if (!cgm_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
1029 return 0;
1030 if (!cgm_get_value("memory", cg, "memory.stat", &memstat_str))
1031 return 0;
1032 memlimit = strtoul(memlimit_str, NULL, 10);
1033 memusage = strtoul(memusage_str, NULL, 10);
1034 memlimit /= 1024;
1035 memusage /= 1024;
1036 get_mem_cached(memstat_str, &cached);
1037
1038 f = fopen("/proc/meminfo", "r");
1039 if (!f)
1040 return 0;
1041
1042 while (getline(&line, &linelen, f) != -1) {
1043 size_t l;
1044 char *printme, lbuf[100];
1045
1046 memset(lbuf, 0, 100);
1047 if (startswith(line, "MemTotal:")) {
1048 sscanf(line+14, "%lu", &hosttotal);
1049 if (hosttotal < memlimit)
1050 memlimit = hosttotal;
1051 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
1052 printme = lbuf;
1053 } else if (startswith(line, "MemFree:")) {
1054 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
1055 printme = lbuf;
1056 } else if (startswith(line, "MemAvailable:")) {
1057 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
1058 printme = lbuf;
1059 } else if (startswith(line, "Buffers:")) {
1060 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
1061 printme = lbuf;
1062 } else if (startswith(line, "Cached:")) {
1063 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
1064 printme = lbuf;
1065 } else if (startswith(line, "SwapCached:")) {
1066 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
1067 printme = lbuf;
1068 } else
1069 printme = line;
1070 l = snprintf(buf, size, "%s", printme);
1071 buf += l;
1072 size -= l;
1073 total_len += l;
1074 }
1075
1076 return total_len;
1077 }
1078
1079 /*
1080 * Read the cpuset.cpus for cg
1081 * Return the answer in a nih_alloced string
1082 */
1083 static char *get_cpuset(const char *cg)
1084 {
1085 char *answer;
1086
1087 if (!cgm_get_value("cpuset", cg, "cpuset.cpus", &answer))
1088 return NULL;
1089 return answer;
1090 }
1091
1092 /*
1093 * Helper functions for cpuset_in-set
1094 */
1095 char *cpuset_nexttok(const char *c)
1096 {
1097 char *r = strchr(c+1, ',');
1098 if (r)
1099 return r+1;
1100 return NULL;
1101 }
1102
1103 int cpuset_getrange(const char *c, int *a, int *b)
1104 {
1105 int ret;
1106
1107 ret = sscanf(c, "%d-%d", a, b);
1108 return ret;
1109 }
1110
1111 /*
1112 * cpusets are in format "1,2-3,4"
1113 * iow, comma-delimited ranges
1114 */
1115 static bool cpu_in_cpuset(int cpu, const char *cpuset)
1116 {
1117 const char *c;
1118
1119 for (c = cpuset; c; c = cpuset_nexttok(c)) {
1120 int a, b, ret;
1121
1122 ret = cpuset_getrange(c, &a, &b);
1123 if (ret == 1 && cpu == a)
1124 return true;
1125 if (ret != 2) // bad cpuset!
1126 return false;
1127 if (cpu >= a && cpu <= b)
1128 return true;
1129 }
1130
1131 return false;
1132 }
1133
1134 static bool cpuline_in_cpuset(const char *line, const char *cpuset)
1135 {
1136 int cpu;
1137
1138 if (sscanf(line, "processor : %d", &cpu) != 1)
1139 return false;
1140 return cpu_in_cpuset(cpu, cpuset);
1141 }
1142
1143 /*
1144 * check whether this is a '^processor" line in /proc/cpuinfo
1145 */
1146 static bool is_processor_line(const char *line)
1147 {
1148 int cpu;
1149
1150 if (sscanf(line, "processor : %d", &cpu) == 1)
1151 return true;
1152 return false;
1153 }
1154
1155 static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
1156 struct fuse_file_info *fi)
1157 {
1158 struct fuse_context *fc = fuse_get_context();
1159 nih_local char *cg = get_pid_cgroup(fc->pid, "cpuset");
1160 nih_local char *cpuset = NULL;
1161 char *line = NULL;
1162 size_t linelen = 0, total_len = 0;
1163 bool am_printing = false;
1164 int curcpu = -1;
1165 FILE *f;
1166
1167 if (offset)
1168 return -EINVAL;
1169
1170 if (!cg)
1171 return 0;
1172
1173 cpuset = get_cpuset(cg);
1174 if (!cpuset)
1175 return 0;
1176
1177 f = fopen("/proc/cpuinfo", "r");
1178 if (!f)
1179 return 0;
1180
1181 while (getline(&line, &linelen, f) != -1) {
1182 size_t l;
1183 if (is_processor_line(line)) {
1184 am_printing = cpuline_in_cpuset(line, cpuset);
1185 if (am_printing) {
1186 curcpu ++;
1187 l = snprintf(buf, size, "processor : %d\n", curcpu);
1188 buf += l;
1189 size -= l;
1190 total_len += l;
1191 }
1192 continue;
1193 }
1194 if (am_printing) {
1195 l = snprintf(buf, size, "%s", line);
1196 buf += l;
1197 size -= l;
1198 total_len += l;
1199 }
1200 }
1201
1202 return total_len;
1203 }
1204
1205 static int proc_stat_read(char *buf, size_t size, off_t offset,
1206 struct fuse_file_info *fi)
1207 {
1208 struct fuse_context *fc = fuse_get_context();
1209 nih_local char *cg = get_pid_cgroup(fc->pid, "cpuset");
1210 nih_local char *cpuset = NULL;
1211 char *line = NULL;
1212 size_t linelen = 0, total_len = 0;
1213 int curcpu = 0;
1214 FILE *f;
1215
1216 if (offset)
1217 return -EINVAL;
1218
1219 if (!cg)
1220 return 0;
1221
1222 cpuset = get_cpuset(cg);
1223 if (!cpuset)
1224 return 0;
1225
1226 f = fopen("/proc/stat", "r");
1227 if (!f)
1228 return 0;
1229
1230 while (getline(&line, &linelen, f) != -1) {
1231 size_t l;
1232 int cpu;
1233 char *c;
1234
1235 if (sscanf(line, "cpu%d", &cpu) != 1) {
1236 /* not a ^cpu line, just print it */
1237 l = snprintf(buf, size, "%s", line);
1238 buf += l;
1239 size -= l;
1240 total_len += l;
1241 continue;
1242 }
1243 if (!cpu_in_cpuset(cpu, cpuset))
1244 continue;
1245 curcpu ++;
1246
1247 c = strchr(line, ' ');
1248 if (!c)
1249 continue;
1250 l = snprintf(buf, size, "cpu%d %s", curcpu, c);
1251 buf += l;
1252 size -= l;
1253 total_len += l;
1254 }
1255
1256 return total_len;
1257 }
1258
1259 static int proc_uptime_read(char *buf, size_t size, off_t offset,
1260 struct fuse_file_info *fi)
1261 {
1262 return 0;
1263 }
1264
1265 static off_t get_procfile_size(const char *which)
1266 {
1267 FILE *f = fopen(which, "r");
1268 char *line = NULL;
1269 size_t len = 0;
1270 ssize_t sz, answer = 0;
1271 if (!f)
1272 return 0;
1273
1274 while ((sz = getline(&line, &len, f)) != -1)
1275 answer += sz;
1276 fclose (f);
1277
1278 return answer;
1279 }
1280
1281 static int proc_getattr(const char *path, struct stat *sb)
1282 {
1283 struct timespec now;
1284
1285 memset(sb, 0, sizeof(struct stat));
1286 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
1287 return -EINVAL;
1288 sb->st_uid = sb->st_gid = 0;
1289 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
1290 if (strcmp(path, "/proc") == 0) {
1291 sb->st_mode = S_IFDIR | 00555;
1292 sb->st_nlink = 2;
1293 return 0;
1294 }
1295 if (strcmp(path, "/proc/meminfo") == 0 ||
1296 strcmp(path, "/proc/cpuinfo") == 0 ||
1297 strcmp(path, "/proc/uptime") == 0 ||
1298 strcmp(path, "/proc/stat") == 0) {
1299
1300 sb->st_size = get_procfile_size(path);
1301 sb->st_mode = S_IFREG | 00444;
1302 sb->st_nlink = 1;
1303 return 0;
1304 }
1305
1306 return -ENOENT;
1307 }
1308
1309 static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
1310 struct fuse_file_info *fi)
1311 {
1312 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
1313 filler(buf, "meminfo", NULL, 0) != 0 ||
1314 filler(buf, "stat", NULL, 0) != 0 ||
1315 filler(buf, "uptime", NULL, 0) != 0)
1316 return -EINVAL;
1317 return 0;
1318 }
1319
1320 static int proc_open(const char *path, struct fuse_file_info *fi)
1321 {
1322 if (strcmp(path, "/proc/meminfo") == 0 ||
1323 strcmp(path, "/proc/cpuinfo") == 0 ||
1324 strcmp(path, "/proc/uptime") == 0 ||
1325 strcmp(path, "/proc/stat") == 0)
1326 return 0;
1327 return -ENOENT;
1328 }
1329
1330 static int proc_read(const char *path, char *buf, size_t size, off_t offset,
1331 struct fuse_file_info *fi)
1332 {
1333 if (strcmp(path, "/proc/meminfo") == 0)
1334 return proc_meminfo_read(buf, size, offset, fi);
1335 if (strcmp(path, "/proc/cpuinfo") == 0)
1336 return proc_cpuinfo_read(buf, size, offset, fi);
1337 if (strcmp(path, "/proc/uptime") == 0)
1338 return proc_uptime_read(buf, size, offset, fi);
1339 if (strcmp(path, "/proc/stat") == 0)
1340 return proc_stat_read(buf, size, offset, fi);
1341 return -EINVAL;
1342 }
1343
1344 /*
1345 * FUSE ops for /
1346 * these just delegate to the /proc and /cgroup ops as
1347 * needed
1348 */
1349
1350 static int lxcfs_getattr(const char *path, struct stat *sb)
1351 {
1352 if (strcmp(path, "/") == 0) {
1353 sb->st_mode = S_IFDIR | 00755;
1354 sb->st_nlink = 2;
1355 return 0;
1356 }
1357 if (strncmp(path, "/cgroup", 7) == 0) {
1358 return cg_getattr(path, sb);
1359 }
1360 if (strncmp(path, "/proc", 5) == 0) {
1361 return proc_getattr(path, sb);
1362 }
1363 return -EINVAL;
1364 }
1365
1366 static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
1367 {
1368 if (strcmp(path, "/") == 0)
1369 return 0;
1370
1371 if (strncmp(path, "/cgroup", 7) == 0) {
1372 return cg_opendir(path, fi);
1373 }
1374 if (strcmp(path, "/proc") == 0)
1375 return 0;
1376 return -ENOENT;
1377 }
1378
1379 static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
1380 struct fuse_file_info *fi)
1381 {
1382 if (strcmp(path, "/") == 0) {
1383 if (filler(buf, "proc", NULL, 0) != 0 ||
1384 filler(buf, "cgroup", NULL, 0) != 0)
1385 return -EINVAL;
1386 return 0;
1387 }
1388 if (strncmp(path, "/cgroup", 7) == 0)
1389 return cg_readdir(path, buf, filler, offset, fi);
1390 if (strcmp(path, "/proc") == 0)
1391 return proc_readdir(path, buf, filler, offset, fi);
1392 return -EINVAL;
1393 }
1394
1395 static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
1396 {
1397 if (strcmp(path, "/") == 0)
1398 return 0;
1399 if (strncmp(path, "/cgroup", 7) == 0) {
1400 return cg_releasedir(path, fi);
1401 }
1402 if (strcmp(path, "/proc") == 0)
1403 return 0;
1404 return -EINVAL;
1405 }
1406
1407 static int lxcfs_open(const char *path, struct fuse_file_info *fi)
1408 {
1409 if (strncmp(path, "/cgroup", 7) == 0)
1410 return cg_open(path, fi);
1411 if (strncmp(path, "/proc", 5) == 0)
1412 return proc_open(path, fi);
1413
1414 return -EINVAL;
1415 }
1416
1417 static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
1418 struct fuse_file_info *fi)
1419 {
1420 if (strncmp(path, "/cgroup", 7) == 0)
1421 return cg_read(path, buf, size, offset, fi);
1422 if (strncmp(path, "/proc", 5) == 0)
1423 return proc_read(path, buf, size, offset, fi);
1424
1425 return -EINVAL;
1426 }
1427
1428 int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
1429 struct fuse_file_info *fi)
1430 {
1431 if (strncmp(path, "/cgroup", 7) == 0) {
1432 return cg_write(path, buf, size, offset, fi);
1433 }
1434
1435 return -EINVAL;
1436 }
1437
1438 static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
1439 {
1440 return 0;
1441 }
1442
1443 static int lxcfs_release(const char *path, struct fuse_file_info *fi)
1444 {
1445 return 0;
1446 }
1447
1448 static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
1449 {
1450 return 0;
1451 }
1452
1453 int lxcfs_mkdir(const char *path, mode_t mode)
1454 {
1455 if (strncmp(path, "/cgroup", 7) == 0)
1456 return cg_mkdir(path, mode);
1457
1458 return -EINVAL;
1459 }
1460
1461 int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
1462 {
1463 if (strncmp(path, "/cgroup", 7) == 0)
1464 return cg_chown(path, uid, gid);
1465
1466 return -EINVAL;
1467 }
1468
1469 /*
1470 * cat first does a truncate before doing ops->write. This doesn't
1471 * really make sense for cgroups. So just return 0 always but do
1472 * nothing.
1473 */
1474 int lxcfs_truncate(const char *path, off_t newsize)
1475 {
1476 if (strncmp(path, "/cgroup", 7) == 0)
1477 return 0;
1478 return -EINVAL;
1479 }
1480
1481 int lxcfs_rmdir(const char *path)
1482 {
1483 if (strncmp(path, "/cgroup", 7) == 0)
1484 return cg_rmdir(path);
1485 return -EINVAL;
1486 }
1487
1488 int lxcfs_chmod(const char *path, mode_t mode)
1489 {
1490 if (strncmp(path, "/cgroup", 7) == 0)
1491 return cg_chmod(path, mode);
1492 return -EINVAL;
1493 }
1494
1495 const struct fuse_operations lxcfs_ops = {
1496 .getattr = lxcfs_getattr,
1497 .readlink = NULL,
1498 .getdir = NULL,
1499 .mknod = NULL,
1500 .mkdir = lxcfs_mkdir,
1501 .unlink = NULL,
1502 .rmdir = lxcfs_rmdir,
1503 .symlink = NULL,
1504 .rename = NULL,
1505 .link = NULL,
1506 .chmod = lxcfs_chmod,
1507 .chown = lxcfs_chown,
1508 .truncate = lxcfs_truncate,
1509 .utime = NULL,
1510
1511 .open = lxcfs_open,
1512 .read = lxcfs_read,
1513 .release = lxcfs_release,
1514 .write = lxcfs_write,
1515
1516 .statfs = NULL,
1517 .flush = lxcfs_flush,
1518 .fsync = lxcfs_fsync,
1519
1520 .setxattr = NULL,
1521 .getxattr = NULL,
1522 .listxattr = NULL,
1523 .removexattr = NULL,
1524
1525 .opendir = lxcfs_opendir,
1526 .readdir = lxcfs_readdir,
1527 .releasedir = lxcfs_releasedir,
1528
1529 .fsyncdir = NULL,
1530 .init = NULL,
1531 .destroy = NULL,
1532 .access = NULL,
1533 .create = NULL,
1534 .ftruncate = NULL,
1535 .fgetattr = NULL,
1536 };
1537
1538 static void usage(const char *me)
1539 {
1540 fprintf(stderr, "Usage:\n");
1541 fprintf(stderr, "\n");
1542 fprintf(stderr, "%s [FUSE and mount options] mountpoint\n", me);
1543 exit(1);
1544 }
1545
1546 static bool is_help(char *w)
1547 {
1548 if (strcmp(w, "-h") == 0 ||
1549 strcmp(w, "--help") == 0 ||
1550 strcmp(w, "-help") == 0 ||
1551 strcmp(w, "help") == 0)
1552 return true;
1553 return false;
1554 }
1555
1556 int main(int argc, char *argv[])
1557 {
1558 int ret;
1559 struct lxcfs_state *d;
1560
1561 if (argc < 2 || is_help(argv[1]))
1562 usage(argv[0]);
1563
1564 d = malloc(sizeof(*d));
1565 if (!d)
1566 return -1;
1567
1568 if (!cgm_escape_cgroup())
1569 fprintf(stderr, "WARNING: failed to escape to root cgroup\n");
1570
1571 if (!cgm_get_controllers(&d->subsystems))
1572 return -1;
1573
1574 ret = fuse_main(argc, argv, &lxcfs_ops, d);
1575
1576 return ret;
1577 }