]> git.proxmox.com Git - mirror_lxcfs.git/blob - lxcfs.c
Implement SwapTotal and SwapFree support for /proc/meminfo
[mirror_lxcfs.git] / lxcfs.c
1 /* lxcfs
2 *
3 * Copyright © 2014,2015 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
6 * See COPYING file for details.
7 */
8
9 /*
10 * TODO XXX
11 * sanitize paths for '..', cgmanager's not doing that for us any more
12 * does fuse help us?
13 * Surely there are more paths we'll need to sanitize - look back through
14 * cgmanager's sources.
15 */
16
17 #define FUSE_USE_VERSION 26
18
19 #include <stdio.h>
20 #include <dirent.h>
21 #include <fcntl.h>
22 #include <fuse.h>
23 #include <unistd.h>
24 #include <errno.h>
25 #include <stdbool.h>
26 #include <time.h>
27 #include <string.h>
28 #include <stdlib.h>
29 #include <libgen.h>
30 #include <sched.h>
31 #include <linux/sched.h>
32 #include <sys/socket.h>
33 #include <sys/mount.h>
34 #include <wait.h>
35
36 #ifdef FORTRAVIS
37 #define GLIB_DISABLE_DEPRECATION_WARNINGS
38 #include <glib-object.h>
39 #endif
40
41 #include "cgfs.h"
42 #include "config.h" // for VERSION
43
44 enum {
45 LXC_TYPE_CGDIR,
46 LXC_TYPE_CGFILE,
47 LXC_TYPE_PROC_MEMINFO,
48 LXC_TYPE_PROC_CPUINFO,
49 LXC_TYPE_PROC_UPTIME,
50 LXC_TYPE_PROC_STAT,
51 LXC_TYPE_PROC_DISKSTATS,
52 };
53
54 struct file_info {
55 char *controller;
56 char *cgroup;
57 char *file;
58 int type;
59 char *buf; // unused as of yet
60 int buflen;
61 int size; //actual data size
62 int cached;
63 };
64
65 /* reserve buffer size, for cpuall in /proc/stat */
66 #define BUF_RESERVE_SIZE 256
67
68 /*
69 * append pid to *src.
70 * src: a pointer to a char* in which ot append the pid.
71 * sz: the number of characters printed so far, minus trailing \0.
72 * asz: the allocated size so far
73 * pid: the pid to append
74 */
75 static void must_strcat_pid(char **src, size_t *sz, size_t *asz, pid_t pid)
76 {
77 char *d = *src;
78 char tmp[30];
79
80 sprintf(tmp, "%d\n", (int)pid);
81
82 if (!d) {
83 do {
84 d = malloc(BUF_RESERVE_SIZE);
85 } while (!d);
86 *src = d;
87 *asz = BUF_RESERVE_SIZE;
88 } else if (strlen(tmp) + sz + 1 >= asz) {
89 do {
90 d = realloc(d, *asz + BUF_RESERVE_SIZE);
91 } while (!d);
92 *src = d;
93 *asz += BUF_RESERVE_SIZE;
94 }
95 memcpy(d+*sz, tmp, strlen(tmp));
96 *sz += strlen(tmp);
97 d[*sz] = '\0';
98 }
99
100 static int wait_for_pid(pid_t pid)
101 {
102 int status, ret;
103
104 again:
105 ret = waitpid(pid, &status, 0);
106 if (ret == -1) {
107 if (errno == EINTR)
108 goto again;
109 return -1;
110 }
111 if (ret != pid)
112 goto again;
113 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
114 return -1;
115 return 0;
116 }
117
118 /*
119 * Given a open file * to /proc/pid/{u,g}id_map, and an id
120 * valid in the caller's namespace, return the id mapped into
121 * pid's namespace.
122 * Returns the mapped id, or -1 on error.
123 */
124 unsigned int
125 convert_id_to_ns(FILE *idfile, unsigned int in_id)
126 {
127 unsigned int nsuid, // base id for a range in the idfile's namespace
128 hostuid, // base id for a range in the caller's namespace
129 count; // number of ids in this range
130 char line[400];
131 int ret;
132
133 fseek(idfile, 0L, SEEK_SET);
134 while (fgets(line, 400, idfile)) {
135 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
136 if (ret != 3)
137 continue;
138 if (hostuid + count < hostuid || nsuid + count < nsuid) {
139 /*
140 * uids wrapped around - unexpected as this is a procfile,
141 * so just bail.
142 */
143 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
144 nsuid, hostuid, count, line);
145 return -1;
146 }
147 if (hostuid <= in_id && hostuid+count > in_id) {
148 /*
149 * now since hostuid <= in_id < hostuid+count, and
150 * hostuid+count and nsuid+count do not wrap around,
151 * we know that nsuid+(in_id-hostuid) which must be
152 * less that nsuid+(count) must not wrap around
153 */
154 return (in_id - hostuid) + nsuid;
155 }
156 }
157
158 // no answer found
159 return -1;
160 }
161
162 /*
163 * for is_privileged_over,
164 * specify whether we require the calling uid to be root in his
165 * namespace
166 */
167 #define NS_ROOT_REQD true
168 #define NS_ROOT_OPT false
169
170 #define PROCLEN 100
171
172 static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
173 {
174 char fpath[PROCLEN];
175 int ret;
176 bool answer = false;
177 uid_t nsuid;
178
179 if (victim == -1 || uid == -1)
180 return false;
181
182 /*
183 * If the request is one not requiring root in the namespace,
184 * then having the same uid suffices. (i.e. uid 1000 has write
185 * access to files owned by uid 1000
186 */
187 if (!req_ns_root && uid == victim)
188 return true;
189
190 ret = snprintf(fpath, PROCLEN, "/proc/%d/uid_map", pid);
191 if (ret < 0 || ret >= PROCLEN)
192 return false;
193 FILE *f = fopen(fpath, "r");
194 if (!f)
195 return false;
196
197 /* if caller's not root in his namespace, reject */
198 nsuid = convert_id_to_ns(f, uid);
199 if (nsuid)
200 goto out;
201
202 /*
203 * If victim is not mapped into caller's ns, reject.
204 * XXX I'm not sure this check is needed given that fuse
205 * will be sending requests where the vfs has converted
206 */
207 nsuid = convert_id_to_ns(f, victim);
208 if (nsuid == -1)
209 goto out;
210
211 answer = true;
212
213 out:
214 fclose(f);
215 return answer;
216 }
217
218 static bool perms_include(int fmode, mode_t req_mode)
219 {
220 mode_t r;
221
222 switch (req_mode & O_ACCMODE) {
223 case O_RDONLY:
224 r = S_IROTH;
225 break;
226 case O_WRONLY:
227 r = S_IWOTH;
228 break;
229 case O_RDWR:
230 r = S_IROTH | S_IWOTH;
231 break;
232 default:
233 return false;
234 }
235 return ((fmode & r) == r);
236 }
237
238 static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
239 {
240 char *start, *end;
241
242 if (strlen(taskcg) <= strlen(querycg)) {
243 fprintf(stderr, "%s: I was fed bad input\n", __func__);
244 return NULL;
245 }
246
247 if (strcmp(querycg, "/") == 0)
248 start = strdup(taskcg + 1);
249 else
250 start = strdup(taskcg + strlen(querycg) + 1);
251 if (!start)
252 return NULL;
253 end = strchr(start, '/');
254 if (end)
255 *end = '\0';
256 return start;
257 }
258
259 static void stripnewline(char *x)
260 {
261 size_t l = strlen(x);
262 if (l && x[l-1] == '\n')
263 x[l-1] = '\0';
264 }
265
266 static char *get_pid_cgroup(pid_t pid, const char *contrl)
267 {
268 char fnam[PROCLEN];
269 FILE *f;
270 char *answer = NULL;
271 char *line = NULL;
272 size_t len = 0;
273 int ret;
274 const char *h = find_mounted_controller(contrl);
275 if (!h)
276 return NULL;
277
278 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
279 if (ret < 0 || ret >= PROCLEN)
280 return NULL;
281 if (!(f = fopen(fnam, "r")))
282 return NULL;
283
284 while (getline(&line, &len, f) != -1) {
285 char *c1, *c2;
286 if (!line[0])
287 continue;
288 c1 = strchr(line, ':');
289 if (!c1)
290 goto out;
291 c1++;
292 c2 = strchr(c1, ':');
293 if (!c2)
294 goto out;
295 *c2 = '\0';
296 if (strcmp(c1, h) != 0)
297 continue;
298 c2++;
299 stripnewline(c2);
300 do {
301 answer = strdup(c2);
302 } while (!answer);
303 break;
304 }
305
306 out:
307 fclose(f);
308 free(line);
309 return answer;
310 }
311
312 /*
313 * check whether a fuse context may access a cgroup dir or file
314 *
315 * If file is not null, it is a cgroup file to check under cg.
316 * If file is null, then we are checking perms on cg itself.
317 *
318 * For files we can check the mode of the list_keys result.
319 * For cgroups, we must make assumptions based on the files under the
320 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
321 * yet.
322 */
323 static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
324 {
325 struct cgfs_files *k = NULL;
326 bool ret = false;
327
328 if (!file)
329 file = "tasks";
330
331 if (*file == '/')
332 file++;
333
334 k = cgfs_get_key(contrl, cg, file);
335 if (!k)
336 return false;
337
338 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
339 if (perms_include(k->mode >> 6, mode)) {
340 ret = true;
341 goto out;
342 }
343 }
344 if (fc->gid == k->gid) {
345 if (perms_include(k->mode >> 3, mode)) {
346 ret = true;
347 goto out;
348 }
349 }
350 ret = perms_include(k->mode, mode);
351
352 out:
353 free_key(k);
354 return ret;
355 }
356
357 #define INITSCOPE "/init.scope"
358 static void prune_init_slice(char *cg)
359 {
360 char *point;
361 point = cg + strlen(cg) - strlen(INITSCOPE);
362 if (point < cg)
363 return;
364 if (strcmp(point, INITSCOPE) == 0) {
365 if (point == cg)
366 *(point+1) = '\0';
367 else
368 *point = '\0';
369 }
370 }
371
372 /*
373 * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
374 * If caller is in /a, he may act on /a/b, but not on /b.
375 * if the answer is false and nextcg is not NULL, then *nextcg will point
376 * to a string containing the next cgroup directory under cg, which must be
377 * freed by the caller.
378 */
379 static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
380 {
381 char fnam[PROCLEN];
382 FILE *f;
383 bool answer = false;
384 char *line = NULL;
385 size_t len = 0;
386 int ret;
387
388 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
389 if (ret < 0 || ret >= PROCLEN)
390 return false;
391 if (!(f = fopen(fnam, "r")))
392 return false;
393
394 while (getline(&line, &len, f) != -1) {
395 char *c1, *c2, *linecmp;
396 if (!line[0])
397 continue;
398 c1 = strchr(line, ':');
399 if (!c1)
400 goto out;
401 c1++;
402 c2 = strchr(c1, ':');
403 if (!c2)
404 goto out;
405 *c2 = '\0';
406 if (strcmp(c1, contrl) != 0)
407 continue;
408 c2++;
409 stripnewline(c2);
410 prune_init_slice(c2);
411 /*
412 * callers pass in '/' for root cgroup, otherwise they pass
413 * in a cgroup without leading '/'
414 */
415 linecmp = *cg == '/' ? c2 : c2+1;
416 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
417 if (nextcg)
418 *nextcg = get_next_cgroup_dir(linecmp, cg);
419 goto out;
420 }
421 answer = true;
422 goto out;
423 }
424
425 out:
426 fclose(f);
427 free(line);
428 return answer;
429 }
430
431 /*
432 * given /cgroup/freezer/a/b, return "freezer".
433 * the returned char* should NOT be freed.
434 */
435 static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
436 {
437 const char *p1;
438 char *contr, *slash;
439
440 if (strlen(path) < 9)
441 return NULL;
442 if (*(path+7) != '/')
443 return NULL;
444 p1 = path+8;
445 contr = strdupa(p1);
446 if (!contr)
447 return NULL;
448 slash = strstr(contr, "/");
449 if (slash)
450 *slash = '\0';
451
452 int i;
453 for (i = 0; i < num_hierarchies; i++) {
454 if (hierarchies[i] && strcmp(hierarchies[i], contr) == 0)
455 return hierarchies[i];
456 }
457 return NULL;
458 }
459
460 /*
461 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
462 * Note that the returned value may include files (keynames) etc
463 */
464 static const char *find_cgroup_in_path(const char *path)
465 {
466 const char *p1;
467
468 if (strlen(path) < 9)
469 return NULL;
470 p1 = strstr(path+8, "/");
471 if (!p1)
472 return NULL;
473 return p1+1;
474 }
475
476 /*
477 * dir should be freed, file not
478 */
479 static void get_cgdir_and_path(const char *cg, char **dir, char **file)
480 {
481 char *p;
482
483 do {
484 *dir = strdup(cg);
485 } while (!*dir);
486 *file = strrchr(cg, '/');
487 if (!*file) {
488 *file = NULL;
489 return;
490 }
491 p = strrchr(*dir, '/');
492 *p = '\0';
493 }
494
495 /*
496 * FUSE ops for /cgroup
497 */
498
499 static int cg_getattr(const char *path, struct stat *sb)
500 {
501 struct timespec now;
502 struct fuse_context *fc = fuse_get_context();
503 char * cgdir = NULL;
504 char *fpath = NULL, *path1, *path2;
505 struct cgfs_files *k = NULL;
506 const char *cgroup;
507 const char *controller = NULL;
508 int ret = -ENOENT;
509
510
511 if (!fc)
512 return -EIO;
513
514 memset(sb, 0, sizeof(struct stat));
515
516 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
517 return -EINVAL;
518
519 sb->st_uid = sb->st_gid = 0;
520 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
521 sb->st_size = 0;
522
523 if (strcmp(path, "/cgroup") == 0) {
524 sb->st_mode = S_IFDIR | 00755;
525 sb->st_nlink = 2;
526 return 0;
527 }
528
529 controller = pick_controller_from_path(fc, path);
530 if (!controller)
531 return -EIO;
532 cgroup = find_cgroup_in_path(path);
533 if (!cgroup) {
534 /* this is just /cgroup/controller, return it as a dir */
535 sb->st_mode = S_IFDIR | 00755;
536 sb->st_nlink = 2;
537 return 0;
538 }
539
540 get_cgdir_and_path(cgroup, &cgdir, &fpath);
541
542 if (!fpath) {
543 path1 = "/";
544 path2 = cgdir;
545 } else {
546 path1 = cgdir;
547 path2 = fpath;
548 }
549
550 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
551 * Then check that caller's cgroup is under path if fpath is a child
552 * cgroup, or cgdir if fpath is a file */
553
554 if (is_child_cgroup(controller, path1, path2)) {
555 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
556 /* this is just /cgroup/controller, return it as a dir */
557 sb->st_mode = S_IFDIR | 00555;
558 sb->st_nlink = 2;
559 ret = 0;
560 goto out;
561 }
562 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
563 ret = -EACCES;
564 goto out;
565 }
566
567 // get uid, gid, from '/tasks' file and make up a mode
568 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
569 sb->st_mode = S_IFDIR | 00755;
570 k = cgfs_get_key(controller, cgroup, "tasks");
571 if (!k) {
572 sb->st_uid = sb->st_gid = 0;
573 } else {
574 sb->st_uid = k->uid;
575 sb->st_gid = k->gid;
576 }
577 free_key(k);
578 sb->st_nlink = 2;
579 ret = 0;
580 goto out;
581 }
582
583 if ((k = cgfs_get_key(controller, path1, path2)) != NULL) {
584 sb->st_mode = S_IFREG | k->mode;
585 sb->st_nlink = 1;
586 sb->st_uid = k->uid;
587 sb->st_gid = k->gid;
588 sb->st_size = 0;
589 free_key(k);
590 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) {
591 ret = -ENOENT;
592 goto out;
593 }
594 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY)) {
595 ret = -EACCES;
596 goto out;
597 }
598
599 ret = 0;
600 }
601
602 out:
603 free(cgdir);
604 return ret;
605 }
606
607 static int cg_opendir(const char *path, struct fuse_file_info *fi)
608 {
609 struct fuse_context *fc = fuse_get_context();
610 const char *cgroup;
611 struct file_info *dir_info;
612 char *controller = NULL;
613
614 if (!fc)
615 return -EIO;
616
617 if (strcmp(path, "/cgroup") == 0) {
618 cgroup = NULL;
619 controller = NULL;
620 } else {
621 // return list of keys for the controller, and list of child cgroups
622 controller = pick_controller_from_path(fc, path);
623 if (!controller)
624 return -EIO;
625
626 cgroup = find_cgroup_in_path(path);
627 if (!cgroup) {
628 /* this is just /cgroup/controller, return its contents */
629 cgroup = "/";
630 }
631 }
632
633 if (cgroup && !fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
634 return -EACCES;
635 }
636
637 /* we'll free this at cg_releasedir */
638 dir_info = malloc(sizeof(*dir_info));
639 if (!dir_info)
640 return -ENOMEM;
641 dir_info->controller = must_copy_string(controller);
642 dir_info->cgroup = must_copy_string(cgroup);
643 dir_info->type = LXC_TYPE_CGDIR;
644 dir_info->buf = NULL;
645 dir_info->file = NULL;
646 dir_info->buflen = 0;
647
648 fi->fh = (unsigned long)dir_info;
649 return 0;
650 }
651
652 static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
653 struct fuse_file_info *fi)
654 {
655 struct file_info *d = (struct file_info *)fi->fh;
656 struct cgfs_files **list = NULL;
657 int i, ret;
658 char *nextcg = NULL;
659 struct fuse_context *fc = fuse_get_context();
660 char **clist = NULL;
661
662 if (d->type != LXC_TYPE_CGDIR) {
663 fprintf(stderr, "Internal error: file cache info used in readdir\n");
664 return -EIO;
665 }
666 if (!d->cgroup && !d->controller) {
667 // ls /var/lib/lxcfs/cgroup - just show list of controllers
668 int i;
669
670 for (i = 0; i < num_hierarchies; i++) {
671 if (hierarchies[i] && filler(buf, hierarchies[i], NULL, 0) != 0) {
672 return -EIO;
673 }
674 }
675 return 0;
676 }
677
678 if (!cgfs_list_keys(d->controller, d->cgroup, &list)) {
679 // not a valid cgroup
680 ret = -EINVAL;
681 goto out;
682 }
683
684 if (!caller_is_in_ancestor(fc->pid, d->controller, d->cgroup, &nextcg)) {
685 if (nextcg) {
686 int ret;
687 ret = filler(buf, nextcg, NULL, 0);
688 free(nextcg);
689 if (ret != 0) {
690 ret = -EIO;
691 goto out;
692 }
693 }
694 ret = 0;
695 goto out;
696 }
697
698 for (i = 0; list[i]; i++) {
699 if (filler(buf, list[i]->name, NULL, 0) != 0) {
700 ret = -EIO;
701 goto out;
702 }
703 }
704
705 // now get the list of child cgroups
706
707 if (!cgfs_list_children(d->controller, d->cgroup, &clist)) {
708 ret = 0;
709 goto out;
710 }
711 for (i = 0; clist[i]; i++) {
712 if (filler(buf, clist[i], NULL, 0) != 0) {
713 ret = -EIO;
714 goto out;
715 }
716 }
717 ret = 0;
718
719 out:
720 free_keys(list);
721 if (clist) {
722 for (i = 0; clist[i]; i++)
723 free(clist[i]);
724 free(clist);
725 }
726 return ret;
727 }
728
729 static void do_release_file_info(struct file_info *f)
730 {
731 if (!f)
732 return;
733 free(f->controller);
734 free(f->cgroup);
735 free(f->file);
736 free(f->buf);
737 free(f);
738 }
739
740 static int cg_releasedir(const char *path, struct fuse_file_info *fi)
741 {
742 struct file_info *d = (struct file_info *)fi->fh;
743
744 do_release_file_info(d);
745 return 0;
746 }
747
748 static int cg_open(const char *path, struct fuse_file_info *fi)
749 {
750 const char *cgroup;
751 char *fpath = NULL, *path1, *path2, * cgdir = NULL, *controller;
752 struct cgfs_files *k = NULL;
753 struct file_info *file_info;
754 struct fuse_context *fc = fuse_get_context();
755 int ret;
756
757 if (!fc)
758 return -EIO;
759
760 controller = pick_controller_from_path(fc, path);
761 if (!controller)
762 return -EIO;
763 cgroup = find_cgroup_in_path(path);
764 if (!cgroup)
765 return -EINVAL;
766
767 get_cgdir_and_path(cgroup, &cgdir, &fpath);
768 if (!fpath) {
769 path1 = "/";
770 path2 = cgdir;
771 } else {
772 path1 = cgdir;
773 path2 = fpath;
774 }
775
776 k = cgfs_get_key(controller, path1, path2);
777 if (!k) {
778 ret = -EINVAL;
779 goto out;
780 }
781 free_key(k);
782
783 if (!fc_may_access(fc, controller, path1, path2, fi->flags)) {
784 // should never get here
785 ret = -EACCES;
786 goto out;
787 }
788
789 /* we'll free this at cg_release */
790 file_info = malloc(sizeof(*file_info));
791 if (!file_info) {
792 ret = -ENOMEM;
793 goto out;
794 }
795 file_info->controller = must_copy_string(controller);
796 file_info->cgroup = must_copy_string(path1);
797 file_info->file = must_copy_string(path2);
798 file_info->type = LXC_TYPE_CGFILE;
799 file_info->buf = NULL;
800 file_info->buflen = 0;
801
802 fi->fh = (unsigned long)file_info;
803 ret = 0;
804
805 out:
806 free(cgdir);
807 return ret;
808 }
809
810 static int cg_release(const char *path, struct fuse_file_info *fi)
811 {
812 struct file_info *f = (struct file_info *)fi->fh;
813
814 do_release_file_info(f);
815 return 0;
816 }
817
818 static int msgrecv(int sockfd, void *buf, size_t len)
819 {
820 struct timeval tv;
821 fd_set rfds;
822
823 FD_ZERO(&rfds);
824 FD_SET(sockfd, &rfds);
825 tv.tv_sec = 2;
826 tv.tv_usec = 0;
827
828 if (select(sockfd+1, &rfds, NULL, NULL, &tv) <= 0)
829 return -1;
830 return recv(sockfd, buf, len, MSG_DONTWAIT);
831 }
832
833 #define SEND_CREDS_OK 0
834 #define SEND_CREDS_NOTSK 1
835 #define SEND_CREDS_FAIL 2
836 static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
837 {
838 struct msghdr msg = { 0 };
839 struct iovec iov;
840 struct cmsghdr *cmsg;
841 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
842 char buf[1];
843 buf[0] = 'p';
844
845 if (pingfirst) {
846 if (msgrecv(sock, buf, 1) != 1) {
847 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
848 __func__);
849 return SEND_CREDS_FAIL;
850 }
851 }
852
853 msg.msg_control = cmsgbuf;
854 msg.msg_controllen = sizeof(cmsgbuf);
855
856 cmsg = CMSG_FIRSTHDR(&msg);
857 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
858 cmsg->cmsg_level = SOL_SOCKET;
859 cmsg->cmsg_type = SCM_CREDENTIALS;
860 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
861
862 msg.msg_name = NULL;
863 msg.msg_namelen = 0;
864
865 buf[0] = v;
866 iov.iov_base = buf;
867 iov.iov_len = sizeof(buf);
868 msg.msg_iov = &iov;
869 msg.msg_iovlen = 1;
870
871 if (sendmsg(sock, &msg, 0) < 0) {
872 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
873 strerror(errno));
874 if (errno == 3)
875 return SEND_CREDS_NOTSK;
876 return SEND_CREDS_FAIL;
877 }
878
879 return SEND_CREDS_OK;
880 }
881
882 static bool recv_creds(int sock, struct ucred *cred, char *v)
883 {
884 struct msghdr msg = { 0 };
885 struct iovec iov;
886 struct cmsghdr *cmsg;
887 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
888 char buf[1];
889 int ret;
890 int optval = 1;
891 struct timeval tv;
892 fd_set rfds;
893
894 *v = '1';
895
896 cred->pid = -1;
897 cred->uid = -1;
898 cred->gid = -1;
899
900 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
901 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
902 return false;
903 }
904 buf[0] = '1';
905 if (write(sock, buf, 1) != 1) {
906 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
907 return false;
908 }
909
910 msg.msg_name = NULL;
911 msg.msg_namelen = 0;
912 msg.msg_control = cmsgbuf;
913 msg.msg_controllen = sizeof(cmsgbuf);
914
915 iov.iov_base = buf;
916 iov.iov_len = sizeof(buf);
917 msg.msg_iov = &iov;
918 msg.msg_iovlen = 1;
919
920 FD_ZERO(&rfds);
921 FD_SET(sock, &rfds);
922 tv.tv_sec = 2;
923 tv.tv_usec = 0;
924 if (select(sock+1, &rfds, NULL, NULL, &tv) <= 0) {
925 fprintf(stderr, "Failed to select for scm_cred: %s\n",
926 strerror(errno));
927 return false;
928 }
929 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
930 if (ret < 0) {
931 fprintf(stderr, "Failed to receive scm_cred: %s\n",
932 strerror(errno));
933 return false;
934 }
935
936 cmsg = CMSG_FIRSTHDR(&msg);
937
938 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
939 cmsg->cmsg_level == SOL_SOCKET &&
940 cmsg->cmsg_type == SCM_CREDENTIALS) {
941 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
942 }
943 *v = buf[0];
944
945 return true;
946 }
947
948
949 /*
950 * pid_to_ns - reads pids from a ucred over a socket, then writes the
951 * int value back over the socket. This shifts the pid from the
952 * sender's pidns into tpid's pidns.
953 */
954 static void pid_to_ns(int sock, pid_t tpid)
955 {
956 char v = '0';
957 struct ucred cred;
958
959 while (recv_creds(sock, &cred, &v)) {
960 if (v == '1')
961 _exit(0);
962 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
963 _exit(1);
964 }
965 _exit(0);
966 }
967
968 /*
969 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
970 * in your old pidns. Only children which you fork will be in the target
971 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
972 * actually convert pids
973 */
974 static void pid_to_ns_wrapper(int sock, pid_t tpid)
975 {
976 int newnsfd = -1, ret, cpipe[2];
977 char fnam[100];
978 pid_t cpid;
979 struct timeval tv;
980 fd_set s;
981 char v;
982
983 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
984 if (ret < 0 || ret >= sizeof(fnam))
985 _exit(1);
986 newnsfd = open(fnam, O_RDONLY);
987 if (newnsfd < 0)
988 _exit(1);
989 if (setns(newnsfd, 0) < 0)
990 _exit(1);
991 close(newnsfd);
992
993 if (pipe(cpipe) < 0)
994 _exit(1);
995
996 loop:
997 cpid = fork();
998 if (cpid < 0)
999 _exit(1);
1000
1001 if (!cpid) {
1002 char b = '1';
1003 close(cpipe[0]);
1004 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1005 fprintf(stderr, "%s (child): erorr on write: %s\n",
1006 __func__, strerror(errno));
1007 }
1008 close(cpipe[1]);
1009 pid_to_ns(sock, tpid);
1010 }
1011 // give the child 1 second to be done forking and
1012 // write it's ack
1013 FD_ZERO(&s);
1014 FD_SET(cpipe[0], &s);
1015 tv.tv_sec = 1;
1016 tv.tv_usec = 0;
1017 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
1018 if (ret <= 0)
1019 goto again;
1020 ret = read(cpipe[0], &v, 1);
1021 if (ret != sizeof(char) || v != '1') {
1022 goto again;
1023 }
1024
1025 if (!wait_for_pid(cpid))
1026 _exit(1);
1027 _exit(0);
1028
1029 again:
1030 kill(cpid, SIGKILL);
1031 wait_for_pid(cpid);
1032 goto loop;
1033 }
1034
1035 /*
1036 * To read cgroup files with a particular pid, we will setns into the child
1037 * pidns, open a pipe, fork a child - which will be the first to really be in
1038 * the child ns - which does the cgfs_get_value and writes the data to the pipe.
1039 */
1040 static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
1041 {
1042 int sock[2] = {-1, -1};
1043 char *tmpdata = NULL;
1044 int ret;
1045 pid_t qpid, cpid = -1;
1046 bool answer = false;
1047 char v = '0';
1048 struct ucred cred;
1049 struct timeval tv;
1050 size_t sz = 0, asz = 0;
1051 fd_set s;
1052
1053 if (!cgfs_get_value(contrl, cg, file, &tmpdata))
1054 return false;
1055
1056 /*
1057 * Now we read the pids from returned data one by one, pass
1058 * them into a child in the target namespace, read back the
1059 * translated pids, and put them into our to-return data
1060 */
1061
1062 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1063 perror("socketpair");
1064 free(tmpdata);
1065 return false;
1066 }
1067
1068 cpid = fork();
1069 if (cpid == -1)
1070 goto out;
1071
1072 if (!cpid) // child
1073 pid_to_ns_wrapper(sock[1], tpid);
1074
1075 char *ptr = tmpdata;
1076 cred.uid = 0;
1077 cred.gid = 0;
1078 while (sscanf(ptr, "%d\n", &qpid) == 1) {
1079 cred.pid = qpid;
1080 ret = send_creds(sock[0], &cred, v, true);
1081
1082 if (ret == SEND_CREDS_NOTSK)
1083 goto next;
1084 if (ret == SEND_CREDS_FAIL)
1085 goto out;
1086
1087 // read converted results
1088 FD_ZERO(&s);
1089 FD_SET(sock[0], &s);
1090 tv.tv_sec = 2;
1091 tv.tv_usec = 0;
1092 ret = select(sock[0]+1, &s, NULL, NULL, &tv);
1093 if (ret <= 0) {
1094 fprintf(stderr, "%s: select error waiting for pid from child: %s\n",
1095 __func__, strerror(errno));
1096 goto out;
1097 }
1098 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
1099 fprintf(stderr, "%s: error reading pid from child: %s\n",
1100 __func__, strerror(errno));
1101 goto out;
1102 }
1103 must_strcat_pid(d, &sz, &asz, qpid);
1104 next:
1105 ptr = strchr(ptr, '\n');
1106 if (!ptr)
1107 break;
1108 ptr++;
1109 }
1110
1111 cred.pid = getpid();
1112 v = '1';
1113 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
1114 // failed to ask child to exit
1115 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
1116 __func__, strerror(errno));
1117 goto out;
1118 }
1119
1120 answer = true;
1121
1122 out:
1123 free(tmpdata);
1124 if (cpid != -1)
1125 wait_for_pid(cpid);
1126 if (sock[0] != -1) {
1127 close(sock[0]);
1128 close(sock[1]);
1129 }
1130 return answer;
1131 }
1132
1133 static int cg_read(const char *path, char *buf, size_t size, off_t offset,
1134 struct fuse_file_info *fi)
1135 {
1136 struct fuse_context *fc = fuse_get_context();
1137 struct file_info *f = (struct file_info *)fi->fh;
1138 struct cgfs_files *k = NULL;
1139 char *data = NULL;
1140 int ret, s;
1141 bool r;
1142
1143 if (f->type != LXC_TYPE_CGFILE) {
1144 fprintf(stderr, "Internal error: directory cache info used in cg_read\n");
1145 return -EIO;
1146 }
1147
1148 if (offset)
1149 return 0;
1150
1151 if (!fc)
1152 return -EIO;
1153
1154 if (!f->controller)
1155 return -EINVAL;
1156
1157 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
1158 return -EINVAL;
1159 }
1160 free_key(k);
1161
1162
1163 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY)) { // should never get here
1164 ret = -EACCES;
1165 goto out;
1166 }
1167
1168 if (strcmp(f->file, "tasks") == 0 ||
1169 strcmp(f->file, "/tasks") == 0 ||
1170 strcmp(f->file, "/cgroup.procs") == 0 ||
1171 strcmp(f->file, "cgroup.procs") == 0)
1172 // special case - we have to translate the pids
1173 r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data);
1174 else
1175 r = cgfs_get_value(f->controller, f->cgroup, f->file, &data);
1176
1177 if (!r) {
1178 ret = -EINVAL;
1179 goto out;
1180 }
1181
1182 if (!data) {
1183 ret = 0;
1184 goto out;
1185 }
1186 s = strlen(data);
1187 if (s > size)
1188 s = size;
1189 memcpy(buf, data, s);
1190 if (s > 0 && s < size && data[s-1] != '\n')
1191 buf[s++] = '\n';
1192
1193 ret = s;
1194
1195 out:
1196 free(data);
1197 return ret;
1198 }
1199
1200 static void pid_from_ns(int sock, pid_t tpid)
1201 {
1202 pid_t vpid;
1203 struct ucred cred;
1204 char v;
1205 struct timeval tv;
1206 fd_set s;
1207 int ret;
1208
1209 cred.uid = 0;
1210 cred.gid = 0;
1211 while (1) {
1212 FD_ZERO(&s);
1213 FD_SET(sock, &s);
1214 tv.tv_sec = 2;
1215 tv.tv_usec = 0;
1216 ret = select(sock+1, &s, NULL, NULL, &tv);
1217 if (ret <= 0) {
1218 fprintf(stderr, "%s: bad select before read from parent: %s\n",
1219 __func__, strerror(errno));
1220 _exit(1);
1221 }
1222 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
1223 fprintf(stderr, "%s: bad read from parent: %s\n",
1224 __func__, strerror(errno));
1225 _exit(1);
1226 }
1227 if (vpid == -1) // done
1228 break;
1229 v = '0';
1230 cred.pid = vpid;
1231 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
1232 v = '1';
1233 cred.pid = getpid();
1234 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
1235 _exit(1);
1236 }
1237 }
1238 _exit(0);
1239 }
1240
1241 static void pid_from_ns_wrapper(int sock, pid_t tpid)
1242 {
1243 int newnsfd = -1, ret, cpipe[2];
1244 char fnam[100];
1245 pid_t cpid;
1246 fd_set s;
1247 struct timeval tv;
1248 char v;
1249
1250 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1251 if (ret < 0 || ret >= sizeof(fnam))
1252 _exit(1);
1253 newnsfd = open(fnam, O_RDONLY);
1254 if (newnsfd < 0)
1255 _exit(1);
1256 if (setns(newnsfd, 0) < 0)
1257 _exit(1);
1258 close(newnsfd);
1259
1260 if (pipe(cpipe) < 0)
1261 _exit(1);
1262
1263 loop:
1264 cpid = fork();
1265
1266 if (cpid < 0)
1267 _exit(1);
1268
1269 if (!cpid) {
1270 char b = '1';
1271 close(cpipe[0]);
1272 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1273 fprintf(stderr, "%s (child): erorr on write: %s\n",
1274 __func__, strerror(errno));
1275 }
1276 close(cpipe[1]);
1277 pid_from_ns(sock, tpid);
1278 }
1279
1280 // give the child 1 second to be done forking and
1281 // write it's ack
1282 FD_ZERO(&s);
1283 FD_SET(cpipe[0], &s);
1284 tv.tv_sec = 1;
1285 tv.tv_usec = 0;
1286 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
1287 if (ret <= 0)
1288 goto again;
1289 ret = read(cpipe[0], &v, 1);
1290 if (ret != sizeof(char) || v != '1') {
1291 goto again;
1292 }
1293
1294 if (!wait_for_pid(cpid))
1295 _exit(1);
1296 _exit(0);
1297
1298 again:
1299 kill(cpid, SIGKILL);
1300 wait_for_pid(cpid);
1301 goto loop;
1302 }
1303
1304 static bool do_write_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, const char *buf)
1305 {
1306 int sock[2] = {-1, -1};
1307 pid_t qpid, cpid = -1;
1308 FILE *pids_file = NULL;
1309 bool answer = false, fail = false;
1310
1311 pids_file = open_pids_file(contrl, cg);
1312 if (!pids_file)
1313 return false;
1314
1315 /*
1316 * write the pids to a socket, have helper in writer's pidns
1317 * call movepid for us
1318 */
1319 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1320 perror("socketpair");
1321 goto out;
1322 }
1323
1324 cpid = fork();
1325 if (cpid == -1)
1326 goto out;
1327
1328 if (!cpid) { // child
1329 fclose(pids_file);
1330 pid_from_ns_wrapper(sock[1], tpid);
1331 }
1332
1333 const char *ptr = buf;
1334 while (sscanf(ptr, "%d", &qpid) == 1) {
1335 struct ucred cred;
1336 char v;
1337
1338 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
1339 fprintf(stderr, "%s: error writing pid to child: %s\n",
1340 __func__, strerror(errno));
1341 goto out;
1342 }
1343
1344 if (recv_creds(sock[0], &cred, &v)) {
1345 if (v == '0') {
1346 if (fprintf(pids_file, "%d", (int) cred.pid) < 0)
1347 fail = true;
1348 }
1349 }
1350
1351 ptr = strchr(ptr, '\n');
1352 if (!ptr)
1353 break;
1354 ptr++;
1355 }
1356
1357 /* All good, write the value */
1358 qpid = -1;
1359 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
1360 fprintf(stderr, "Warning: failed to ask child to exit\n");
1361
1362 if (!fail)
1363 answer = true;
1364
1365 out:
1366 if (cpid != -1)
1367 wait_for_pid(cpid);
1368 if (sock[0] != -1) {
1369 close(sock[0]);
1370 close(sock[1]);
1371 }
1372 if (pids_file) {
1373 if (fclose(pids_file) != 0)
1374 answer = false;
1375 }
1376 return answer;
1377 }
1378
1379 int cg_write(const char *path, const char *buf, size_t size, off_t offset,
1380 struct fuse_file_info *fi)
1381 {
1382 struct fuse_context *fc = fuse_get_context();
1383 char *localbuf = NULL;
1384 struct cgfs_files *k = NULL;
1385 struct file_info *f = (struct file_info *)fi->fh;
1386 bool r;
1387
1388 if (f->type != LXC_TYPE_CGFILE) {
1389 fprintf(stderr, "Internal error: directory cache info used in cg_write\n");
1390 return -EIO;
1391 }
1392
1393 if (offset)
1394 return 0;
1395
1396 if (!fc)
1397 return -EIO;
1398
1399 localbuf = alloca(size+1);
1400 localbuf[size] = '\0';
1401 memcpy(localbuf, buf, size);
1402
1403 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
1404 size = -EINVAL;
1405 goto out;
1406 }
1407
1408 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY)) {
1409 size = -EACCES;
1410 goto out;
1411 }
1412
1413 if (strcmp(f->file, "tasks") == 0 ||
1414 strcmp(f->file, "/tasks") == 0 ||
1415 strcmp(f->file, "/cgroup.procs") == 0 ||
1416 strcmp(f->file, "cgroup.procs") == 0)
1417 // special case - we have to translate the pids
1418 r = do_write_pids(fc->pid, f->controller, f->cgroup, f->file, localbuf);
1419 else
1420 r = cgfs_set_value(f->controller, f->cgroup, f->file, localbuf);
1421
1422 if (!r)
1423 size = -EINVAL;
1424
1425 out:
1426 free_key(k);
1427 return size;
1428 }
1429
1430 int cg_chown(const char *path, uid_t uid, gid_t gid)
1431 {
1432 struct fuse_context *fc = fuse_get_context();
1433 char *cgdir = NULL, *fpath = NULL, *path1, *path2, *controller;
1434 struct cgfs_files *k = NULL;
1435 const char *cgroup;
1436 int ret;
1437
1438 if (!fc)
1439 return -EIO;
1440
1441 if (strcmp(path, "/cgroup") == 0)
1442 return -EINVAL;
1443
1444 controller = pick_controller_from_path(fc, path);
1445 if (!controller)
1446 return -EINVAL;
1447 cgroup = find_cgroup_in_path(path);
1448 if (!cgroup)
1449 /* this is just /cgroup/controller */
1450 return -EINVAL;
1451
1452 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1453
1454 if (!fpath) {
1455 path1 = "/";
1456 path2 = cgdir;
1457 } else {
1458 path1 = cgdir;
1459 path2 = fpath;
1460 }
1461
1462 if (is_child_cgroup(controller, path1, path2)) {
1463 // get uid, gid, from '/tasks' file and make up a mode
1464 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1465 k = cgfs_get_key(controller, cgroup, "tasks");
1466
1467 } else
1468 k = cgfs_get_key(controller, path1, path2);
1469
1470 if (!k) {
1471 ret = -EINVAL;
1472 goto out;
1473 }
1474
1475 /*
1476 * This being a fuse request, the uid and gid must be valid
1477 * in the caller's namespace. So we can just check to make
1478 * sure that the caller is root in his uid, and privileged
1479 * over the file's current owner.
1480 */
1481 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) {
1482 ret = -EACCES;
1483 goto out;
1484 }
1485
1486 if (!cgfs_chown_file(controller, cgroup, uid, gid)) {
1487 ret = -EINVAL;
1488 goto out;
1489 }
1490
1491 ret = 0;
1492
1493 out:
1494 free_key(k);
1495 free(cgdir);
1496
1497 return ret;
1498 }
1499
1500 int cg_chmod(const char *path, mode_t mode)
1501 {
1502 struct fuse_context *fc = fuse_get_context();
1503 char * cgdir = NULL, *fpath = NULL, *path1, *path2, *controller;
1504 struct cgfs_files *k = NULL;
1505 const char *cgroup;
1506 int ret;
1507
1508 if (!fc)
1509 return -EIO;
1510
1511 if (strcmp(path, "/cgroup") == 0)
1512 return -EINVAL;
1513
1514 controller = pick_controller_from_path(fc, path);
1515 if (!controller)
1516 return -EINVAL;
1517 cgroup = find_cgroup_in_path(path);
1518 if (!cgroup)
1519 /* this is just /cgroup/controller */
1520 return -EINVAL;
1521
1522 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1523
1524 if (!fpath) {
1525 path1 = "/";
1526 path2 = cgdir;
1527 } else {
1528 path1 = cgdir;
1529 path2 = fpath;
1530 }
1531
1532 if (is_child_cgroup(controller, path1, path2)) {
1533 // get uid, gid, from '/tasks' file and make up a mode
1534 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1535 k = cgfs_get_key(controller, cgroup, "tasks");
1536
1537 } else
1538 k = cgfs_get_key(controller, path1, path2);
1539
1540 if (!k) {
1541 ret = -EINVAL;
1542 goto out;
1543 }
1544
1545 /*
1546 * This being a fuse request, the uid and gid must be valid
1547 * in the caller's namespace. So we can just check to make
1548 * sure that the caller is root in his uid, and privileged
1549 * over the file's current owner.
1550 */
1551 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
1552 ret = -EPERM;
1553 goto out;
1554 }
1555
1556 if (!cgfs_chmod_file(controller, cgroup, mode)) {
1557 ret = -EINVAL;
1558 goto out;
1559 }
1560
1561 ret = 0;
1562 out:
1563 free_key(k);
1564 free(cgdir);
1565 return ret;
1566 }
1567
1568 int cg_mkdir(const char *path, mode_t mode)
1569 {
1570 struct fuse_context *fc = fuse_get_context();
1571 char *fpath = NULL, *path1, *cgdir = NULL, *controller;
1572 const char *cgroup;
1573 int ret;
1574
1575 if (!fc)
1576 return -EIO;
1577
1578
1579 controller = pick_controller_from_path(fc, path);
1580 if (!controller)
1581 return -EINVAL;
1582
1583 cgroup = find_cgroup_in_path(path);
1584 if (!cgroup)
1585 return -EINVAL;
1586
1587 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1588 if (!fpath)
1589 path1 = "/";
1590 else
1591 path1 = cgdir;
1592
1593 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) {
1594 ret = -EACCES;
1595 goto out;
1596 }
1597 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) {
1598 ret = -EACCES;
1599 goto out;
1600 }
1601
1602 if (fc->uid == 0 && fc->gid == 0) {
1603 if (!cgfs_create(controller, cgroup)) {
1604 ret = -EINVAL;
1605 goto out;
1606 }
1607 } else {
1608 if (setresuid(fc->uid, fc->gid, 0) < 0) { // bail
1609 fprintf(stderr, "ERROR - DANGER - setresuid failed!\n");
1610 exit(1);
1611 }
1612
1613 bool bret = cgfs_create(controller, cgroup);
1614
1615 if (setresuid(0, 0, 0) < 0) {
1616 fprintf(stderr, "ERROR - failed to restore uids!\n");
1617 exit(1);
1618 }
1619 if (!bret) {
1620 ret = -EINVAL;
1621 goto out;
1622 }
1623 }
1624
1625 ret = 0;
1626
1627 out:
1628 free(cgdir);
1629 return ret;
1630 }
1631
1632 static int cg_rmdir(const char *path)
1633 {
1634 struct fuse_context *fc = fuse_get_context();
1635 char *fpath = NULL, *cgdir = NULL, *controller;
1636 const char *cgroup;
1637 int ret;
1638
1639 if (!fc)
1640 return -EIO;
1641
1642 controller = pick_controller_from_path(fc, path);
1643 if (!controller)
1644 return -EINVAL;
1645
1646 cgroup = find_cgroup_in_path(path);
1647 if (!cgroup)
1648 return -EINVAL;
1649
1650 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1651 if (!fpath) {
1652 ret = -EINVAL;
1653 goto out;
1654 }
1655
1656 fprintf(stderr, "rmdir: verifying access to %s:%s (req path %s)\n",
1657 controller, cgdir, path);
1658 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) {
1659 ret = -EACCES;
1660 goto out;
1661 }
1662 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
1663 ret = -EACCES;
1664 goto out;
1665 }
1666
1667 if (!cgfs_remove(controller, cgroup)) {
1668 ret = -EINVAL;
1669 goto out;
1670 }
1671
1672 ret = 0;
1673
1674 out:
1675 free(cgdir);
1676 return ret;
1677 }
1678
1679 static bool startswith(const char *line, const char *pref)
1680 {
1681 if (strncmp(line, pref, strlen(pref)) == 0)
1682 return true;
1683 return false;
1684 }
1685
1686 static void get_mem_cached(char *memstat, unsigned long *v)
1687 {
1688 char *eol;
1689
1690 *v = 0;
1691 while (*memstat) {
1692 if (startswith(memstat, "total_cache")) {
1693 sscanf(memstat + 11, "%lu", v);
1694 *v /= 1024;
1695 return;
1696 }
1697 eol = strchr(memstat, '\n');
1698 if (!eol)
1699 return;
1700 memstat = eol+1;
1701 }
1702 }
1703
1704 static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
1705 {
1706 char *eol;
1707 char key[32];
1708
1709 memset(key, 0, 32);
1710 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
1711
1712 size_t len = strlen(key);
1713 *v = 0;
1714
1715 while (*str) {
1716 if (startswith(str, key)) {
1717 sscanf(str + len, "%lu", v);
1718 return;
1719 }
1720 eol = strchr(str, '\n');
1721 if (!eol)
1722 return;
1723 str = eol+1;
1724 }
1725 }
1726
1727 static int read_file(const char *path, char *buf, size_t size,
1728 struct file_info *d)
1729 {
1730 size_t linelen = 0, total_len = 0, rv = 0;
1731 char *line = NULL;
1732 char *cache = d->buf;
1733 size_t cache_size = d->buflen;
1734 FILE *f = fopen(path, "r");
1735 if (!f)
1736 return 0;
1737
1738 while (getline(&line, &linelen, f) != -1) {
1739 size_t l = snprintf(cache, cache_size, "%s", line);
1740 if (l < 0) {
1741 perror("Error writing to cache");
1742 rv = 0;
1743 goto err;
1744 }
1745 if (l >= cache_size) {
1746 fprintf(stderr, "Internal error: truncated write to cache\n");
1747 rv = 0;
1748 goto err;
1749 }
1750 if (l < cache_size) {
1751 cache += l;
1752 cache_size -= l;
1753 total_len += l;
1754 } else {
1755 cache += cache_size;
1756 total_len += cache_size;
1757 cache_size = 0;
1758 break;
1759 }
1760 }
1761
1762 d->size = total_len;
1763 if (total_len > size ) total_len = size;
1764
1765 /* read from off 0 */
1766 memcpy(buf, d->buf, total_len);
1767 rv = total_len;
1768 err:
1769 fclose(f);
1770 free(line);
1771 return rv;
1772 }
1773
1774 /*
1775 * FUSE ops for /proc
1776 */
1777
1778 static unsigned long get_memlimit(const char *cgroup)
1779 {
1780 char *memlimit_str = NULL;
1781 unsigned long memlimit = -1;
1782
1783 if (cgfs_get_value("memory", cgroup, "memory.limit_in_bytes", &memlimit_str))
1784 memlimit = strtoul(memlimit_str, NULL, 10);
1785
1786 free(memlimit_str);
1787
1788 return memlimit;
1789 }
1790
1791 static unsigned long get_min_memlimit(const char *cgroup)
1792 {
1793 char *copy = strdupa(cgroup);
1794 unsigned long memlimit = 0, retlimit;
1795
1796 retlimit = get_memlimit(copy);
1797
1798 while (strcmp(copy, "/") != 0) {
1799 copy = dirname(copy);
1800 memlimit = get_memlimit(copy);
1801 if (memlimit != -1 && memlimit < retlimit)
1802 retlimit = memlimit;
1803 };
1804
1805 return retlimit;
1806 }
1807
1808 static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1809 struct fuse_file_info *fi)
1810 {
1811 struct fuse_context *fc = fuse_get_context();
1812 struct file_info *d = (struct file_info *)fi->fh;
1813 char *cg;
1814 char *memusage_str = NULL, *memstat_str = NULL,
1815 *memswlimit_str = NULL, *memswusage_str = NULL;
1816 unsigned long memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0,
1817 cached = 0, hosttotal = 0;
1818 char *line = NULL;
1819 size_t linelen = 0, total_len = 0, rv = 0;
1820 char *cache = d->buf;
1821 size_t cache_size = d->buflen;
1822 FILE *f = NULL;
1823
1824 if (offset){
1825 if (offset > d->size)
1826 return -EINVAL;
1827 if (!d->cached)
1828 return 0;
1829 int left = d->size - offset;
1830 total_len = left > size ? size: left;
1831 memcpy(buf, cache + offset, total_len);
1832 return total_len;
1833 }
1834
1835 cg = get_pid_cgroup(fc->pid, "memory");
1836 if (!cg)
1837 return read_file("/proc/meminfo", buf, size, d);
1838
1839 memlimit = get_min_memlimit(cg);
1840 if (!cgfs_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
1841 goto err;
1842 if (!cgfs_get_value("memory", cg, "memory.stat", &memstat_str))
1843 goto err;
1844
1845 // Following values are allowed to fail, because swapaccount might be turned
1846 // off for current kernel
1847 if(cgfs_get_value("memory", cg, "memory.memsw.limit_in_bytes", &memswlimit_str) &&
1848 cgfs_get_value("memory", cg, "memory.memsw.usage_in_bytes", &memswusage_str))
1849 {
1850 memswlimit = strtoul(memswlimit_str, NULL, 10);
1851 memswusage = strtoul(memswusage_str, NULL, 10);
1852 memswlimit /= 1024;
1853 memswusage /= 1024;
1854 }
1855
1856 memusage = strtoul(memusage_str, NULL, 10);
1857 memlimit /= 1024;
1858 memusage /= 1024;
1859 get_mem_cached(memstat_str, &cached);
1860
1861 f = fopen("/proc/meminfo", "r");
1862 if (!f)
1863 goto err;
1864
1865 while (getline(&line, &linelen, f) != -1) {
1866 size_t l;
1867 char *printme, lbuf[100];
1868
1869 memset(lbuf, 0, 100);
1870 if (startswith(line, "MemTotal:")) {
1871 sscanf(line+14, "%lu", &hosttotal);
1872 if (hosttotal < memlimit)
1873 memlimit = hosttotal;
1874 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
1875 printme = lbuf;
1876 } else if (startswith(line, "MemFree:")) {
1877 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
1878 printme = lbuf;
1879 } else if (startswith(line, "MemAvailable:")) {
1880 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
1881 printme = lbuf;
1882 } else if (startswith(line, "SwapTotal:") && memswlimit > 0) {
1883 snprintf(lbuf, 100, "SwapTotal: %8lu kB\n", memswlimit - memlimit);
1884 printme = lbuf;
1885 } else if (startswith(line, "SwapFree:") && memswlimit > 0 && memswusage > 0) {
1886 snprintf(lbuf, 100, "SwapFree: %8lu kB\n",
1887 (memswlimit - memlimit) - (memswusage - memusage));
1888 printme = lbuf;
1889 } else if (startswith(line, "Buffers:")) {
1890 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
1891 printme = lbuf;
1892 } else if (startswith(line, "Cached:")) {
1893 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
1894 printme = lbuf;
1895 } else if (startswith(line, "SwapCached:")) {
1896 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
1897 printme = lbuf;
1898 } else
1899 printme = line;
1900
1901 l = snprintf(cache, cache_size, "%s", printme);
1902 if (l < 0) {
1903 perror("Error writing to cache");
1904 rv = 0;
1905 goto err;
1906
1907 }
1908 if (l >= cache_size) {
1909 fprintf(stderr, "Internal error: truncated write to cache\n");
1910 rv = 0;
1911 goto err;
1912 }
1913
1914 cache += l;
1915 cache_size -= l;
1916 total_len += l;
1917 }
1918
1919 d->cached = 1;
1920 d->size = total_len;
1921 if (total_len > size ) total_len = size;
1922 memcpy(buf, d->buf, total_len);
1923
1924 rv = total_len;
1925 err:
1926 if (f)
1927 fclose(f);
1928 free(line);
1929 free(cg);
1930 free(memusage_str);
1931 free(memswlimit_str);
1932 free(memswusage_str);
1933 free(memstat_str);
1934 return rv;
1935 }
1936
1937 /*
1938 * Read the cpuset.cpus for cg
1939 * Return the answer in a newly allocated string which must be freed
1940 */
1941 static char *get_cpuset(const char *cg)
1942 {
1943 char *answer;
1944
1945 if (!cgfs_get_value("cpuset", cg, "cpuset.cpus", &answer))
1946 return NULL;
1947 return answer;
1948 }
1949
1950 bool cpu_in_cpuset(int cpu, const char *cpuset);
1951
1952 static bool cpuline_in_cpuset(const char *line, const char *cpuset)
1953 {
1954 int cpu;
1955
1956 if (sscanf(line, "processor : %d", &cpu) != 1)
1957 return false;
1958 return cpu_in_cpuset(cpu, cpuset);
1959 }
1960
1961 /*
1962 * check whether this is a '^processor" line in /proc/cpuinfo
1963 */
1964 static bool is_processor_line(const char *line)
1965 {
1966 int cpu;
1967
1968 if (sscanf(line, "processor : %d", &cpu) == 1)
1969 return true;
1970 return false;
1971 }
1972
1973 static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
1974 struct fuse_file_info *fi)
1975 {
1976 struct fuse_context *fc = fuse_get_context();
1977 struct file_info *d = (struct file_info *)fi->fh;
1978 char *cg;
1979 char *cpuset = NULL;
1980 char *line = NULL;
1981 size_t linelen = 0, total_len = 0, rv = 0;
1982 bool am_printing = false;
1983 int curcpu = -1;
1984 char *cache = d->buf;
1985 size_t cache_size = d->buflen;
1986 FILE *f = NULL;
1987
1988 if (offset){
1989 if (offset > d->size)
1990 return -EINVAL;
1991 if (!d->cached)
1992 return 0;
1993 int left = d->size - offset;
1994 total_len = left > size ? size: left;
1995 memcpy(buf, cache + offset, total_len);
1996 return total_len;
1997 }
1998
1999 cg = get_pid_cgroup(fc->pid, "cpuset");
2000 if (!cg)
2001 return read_file("proc/cpuinfo", buf, size, d);
2002
2003 cpuset = get_cpuset(cg);
2004 if (!cpuset)
2005 goto err;
2006
2007 f = fopen("/proc/cpuinfo", "r");
2008 if (!f)
2009 goto err;
2010
2011 while (getline(&line, &linelen, f) != -1) {
2012 size_t l;
2013 if (is_processor_line(line)) {
2014 am_printing = cpuline_in_cpuset(line, cpuset);
2015 if (am_printing) {
2016 curcpu ++;
2017 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
2018 if (l < 0) {
2019 perror("Error writing to cache");
2020 rv = 0;
2021 goto err;
2022 }
2023 if (l >= cache_size) {
2024 fprintf(stderr, "Internal error: truncated write to cache\n");
2025 rv = 0;
2026 goto err;
2027 }
2028 if (l < cache_size){
2029 cache += l;
2030 cache_size -= l;
2031 total_len += l;
2032 }else{
2033 cache += cache_size;
2034 total_len += cache_size;
2035 cache_size = 0;
2036 break;
2037 }
2038 }
2039 continue;
2040 }
2041 if (am_printing) {
2042 l = snprintf(cache, cache_size, "%s", line);
2043 if (l < 0) {
2044 perror("Error writing to cache");
2045 rv = 0;
2046 goto err;
2047 }
2048 if (l >= cache_size) {
2049 fprintf(stderr, "Internal error: truncated write to cache\n");
2050 rv = 0;
2051 goto err;
2052 }
2053 if (l < cache_size) {
2054 cache += l;
2055 cache_size -= l;
2056 total_len += l;
2057 } else {
2058 cache += cache_size;
2059 total_len += cache_size;
2060 cache_size = 0;
2061 break;
2062 }
2063 }
2064 }
2065
2066 d->cached = 1;
2067 d->size = total_len;
2068 if (total_len > size ) total_len = size;
2069
2070 /* read from off 0 */
2071 memcpy(buf, d->buf, total_len);
2072 rv = total_len;
2073 err:
2074 if (f)
2075 fclose(f);
2076 free(line);
2077 free(cpuset);
2078 free(cg);
2079 return rv;
2080 }
2081
2082 static int proc_stat_read(char *buf, size_t size, off_t offset,
2083 struct fuse_file_info *fi)
2084 {
2085 struct fuse_context *fc = fuse_get_context();
2086 struct file_info *d = (struct file_info *)fi->fh;
2087 char *cg;
2088 char *cpuset = NULL;
2089 char *line = NULL;
2090 size_t linelen = 0, total_len = 0, rv = 0;
2091 int curcpu = -1; /* cpu numbering starts at 0 */
2092 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0;
2093 unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
2094 irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0;
2095 #define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
2096 char cpuall[CPUALL_MAX_SIZE];
2097 /* reserve for cpu all */
2098 char *cache = d->buf + CPUALL_MAX_SIZE;
2099 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
2100 FILE *f = NULL;
2101
2102 if (offset){
2103 if (offset > d->size)
2104 return -EINVAL;
2105 if (!d->cached)
2106 return 0;
2107 int left = d->size - offset;
2108 total_len = left > size ? size: left;
2109 memcpy(buf, d->buf + offset, total_len);
2110 return total_len;
2111 }
2112
2113 cg = get_pid_cgroup(fc->pid, "cpuset");
2114 if (!cg)
2115 return read_file("/proc/stat", buf, size, d);
2116
2117 cpuset = get_cpuset(cg);
2118 if (!cpuset)
2119 goto err;
2120
2121 f = fopen("/proc/stat", "r");
2122 if (!f)
2123 goto err;
2124
2125 //skip first line
2126 if (getline(&line, &linelen, f) < 0) {
2127 fprintf(stderr, "proc_stat_read read first line failed\n");
2128 goto err;
2129 }
2130
2131 while (getline(&line, &linelen, f) != -1) {
2132 size_t l;
2133 int cpu;
2134 char cpu_char[10]; /* That's a lot of cores */
2135 char *c;
2136
2137 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
2138 /* not a ^cpuN line containing a number N, just print it */
2139 l = snprintf(cache, cache_size, "%s", line);
2140 if (l < 0) {
2141 perror("Error writing to cache");
2142 rv = 0;
2143 goto err;
2144 }
2145 if (l >= cache_size) {
2146 fprintf(stderr, "Internal error: truncated write to cache\n");
2147 rv = 0;
2148 goto err;
2149 }
2150 if (l < cache_size) {
2151 cache += l;
2152 cache_size -= l;
2153 total_len += l;
2154 continue;
2155 } else {
2156 //no more space, break it
2157 cache += cache_size;
2158 total_len += cache_size;
2159 cache_size = 0;
2160 break;
2161 }
2162 }
2163
2164 if (sscanf(cpu_char, "%d", &cpu) != 1)
2165 continue;
2166 if (!cpu_in_cpuset(cpu, cpuset))
2167 continue;
2168 curcpu ++;
2169
2170 c = strchr(line, ' ');
2171 if (!c)
2172 continue;
2173 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
2174 if (l < 0) {
2175 perror("Error writing to cache");
2176 rv = 0;
2177 goto err;
2178
2179 }
2180 if (l >= cache_size) {
2181 fprintf(stderr, "Internal error: truncated write to cache\n");
2182 rv = 0;
2183 goto err;
2184 }
2185
2186 cache += l;
2187 cache_size -= l;
2188 total_len += l;
2189
2190 if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, &idle, &iowait, &irq,
2191 &softirq, &steal, &guest) != 9)
2192 continue;
2193 user_sum += user;
2194 nice_sum += nice;
2195 system_sum += system;
2196 idle_sum += idle;
2197 iowait_sum += iowait;
2198 irq_sum += irq;
2199 softirq_sum += softirq;
2200 steal_sum += steal;
2201 guest_sum += guest;
2202 }
2203
2204 cache = d->buf;
2205
2206 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
2207 "cpu ", user_sum, nice_sum, system_sum, idle_sum, iowait_sum, irq_sum, softirq_sum, steal_sum, guest_sum);
2208 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE){
2209 memcpy(cache, cpuall, cpuall_len);
2210 cache += cpuall_len;
2211 } else{
2212 /* shouldn't happen */
2213 fprintf(stderr, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len);
2214 cpuall_len = 0;
2215 }
2216
2217 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
2218 total_len += cpuall_len;
2219 d->cached = 1;
2220 d->size = total_len;
2221 if (total_len > size ) total_len = size;
2222
2223 memcpy(buf, d->buf, total_len);
2224 rv = total_len;
2225
2226 err:
2227 if (f)
2228 fclose(f);
2229 free(line);
2230 free(cpuset);
2231 free(cg);
2232 return rv;
2233 }
2234
2235 /*
2236 * How to guess what to present for uptime?
2237 * One thing we could do would be to take the date on the caller's
2238 * memory.usage_in_bytes file, which should equal the time of creation
2239 * of his cgroup. However, a task could be in a sub-cgroup of the
2240 * container. The same problem exists if we try to look at the ages
2241 * of processes in the caller's cgroup.
2242 *
2243 * So we'll fork a task that will enter the caller's pidns, mount a
2244 * fresh procfs, get the age of /proc/1, and pass that back over a pipe.
2245 *
2246 * For the second uptime #, we'll do as Stéphane had done, just copy
2247 * the number from /proc/uptime. Not sure how to best emulate 'idle'
2248 * time. Maybe someone can come up with a good algorithm and submit a
2249 * patch. Maybe something based on cpushare info?
2250 */
2251
2252 /* return age of the reaper for $pid, taken from ctime of its procdir */
2253 static long int get_pid1_time(pid_t pid)
2254 {
2255 char fnam[100];
2256 int fd, cpipe[2], ret;
2257 struct stat sb;
2258 pid_t cpid;
2259 struct timeval tv;
2260 fd_set s;
2261 char v;
2262
2263 if (unshare(CLONE_NEWNS))
2264 return 0;
2265
2266 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
2267 perror("rslave mount failed");
2268 return 0;
2269 }
2270
2271 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", pid);
2272 if (ret < 0 || ret >= sizeof(fnam))
2273 return 0;
2274
2275 fd = open(fnam, O_RDONLY);
2276 if (fd < 0) {
2277 perror("get_pid1_time open of ns/pid");
2278 return 0;
2279 }
2280 if (setns(fd, 0)) {
2281 perror("get_pid1_time setns 1");
2282 close(fd);
2283 return 0;
2284 }
2285 close(fd);
2286
2287 if (pipe(cpipe) < 0)
2288 exit(1);
2289
2290 loop:
2291 cpid = fork();
2292 if (cpid < 0)
2293 return 0;
2294
2295 if (!cpid) {
2296 char b = '1';
2297 close(cpipe[0]);
2298 if (write(cpipe[1], &b, sizeof(char)) < 0) {
2299 fprintf(stderr, "%s (child): erorr on write: %s\n",
2300 __func__, strerror(errno));
2301 }
2302 close(cpipe[1]);
2303 umount2("/proc", MNT_DETACH);
2304 if (mount("proc", "/proc", "proc", 0, NULL)) {
2305 perror("get_pid1_time mount");
2306 return 0;
2307 }
2308 ret = lstat("/proc/1", &sb);
2309 if (ret) {
2310 perror("get_pid1_time lstat");
2311 return 0;
2312 }
2313 return time(NULL) - sb.st_ctime;
2314 }
2315
2316 // give the child 1 second to be done forking and
2317 // write it's ack
2318 FD_ZERO(&s);
2319 FD_SET(cpipe[0], &s);
2320 tv.tv_sec = 1;
2321 tv.tv_usec = 0;
2322 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
2323 if (ret <= 0)
2324 goto again;
2325 ret = read(cpipe[0], &v, 1);
2326 if (ret != sizeof(char) || v != '1') {
2327 goto again;
2328 }
2329
2330 wait_for_pid(cpid);
2331 _exit(0);
2332
2333 again:
2334 kill(cpid, SIGKILL);
2335 wait_for_pid(cpid);
2336 goto loop;
2337 }
2338
2339 static long int getreaperage(pid_t qpid)
2340 {
2341 int pid, mypipe[2], ret;
2342 struct timeval tv;
2343 fd_set s;
2344 long int mtime, answer = 0;
2345
2346 if (pipe(mypipe)) {
2347 return 0;
2348 }
2349
2350 pid = fork();
2351
2352 if (!pid) { // child
2353 mtime = get_pid1_time(qpid);
2354 if (write(mypipe[1], &mtime, sizeof(mtime)) != sizeof(mtime))
2355 fprintf(stderr, "Warning: bad write from getreaperage\n");
2356 _exit(0);
2357 }
2358
2359 close(mypipe[1]);
2360 FD_ZERO(&s);
2361 FD_SET(mypipe[0], &s);
2362 tv.tv_sec = 1;
2363 tv.tv_usec = 0;
2364 ret = select(mypipe[0]+1, &s, NULL, NULL, &tv);
2365 if (ret <= 0) {
2366 perror("select");
2367 goto out;
2368 }
2369 if (!ret) {
2370 fprintf(stderr, "timed out\n");
2371 goto out;
2372 }
2373 if (read(mypipe[0], &mtime, sizeof(mtime)) != sizeof(mtime)) {
2374 perror("read");
2375 goto out;
2376 }
2377 answer = mtime;
2378
2379 out:
2380 wait_for_pid(pid);
2381 close(mypipe[0]);
2382 return answer;
2383 }
2384
2385 /*
2386 * fork a task which switches to @task's namespace and writes '1'.
2387 * over a unix sock so we can read the task's reaper's pid in our
2388 * namespace
2389 */
2390 void write_task_init_pid_exit(int sock, pid_t target)
2391 {
2392 struct ucred cred;
2393 char fnam[100];
2394 pid_t pid;
2395 char v;
2396 int fd, ret;
2397
2398 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", (int)target);
2399 if (ret < 0 || ret >= sizeof(fnam))
2400 exit(1);
2401
2402 fd = open(fnam, O_RDONLY);
2403 if (fd < 0) {
2404 perror("get_pid1_time open of ns/pid");
2405 exit(1);
2406 }
2407 if (setns(fd, 0)) {
2408 perror("get_pid1_time setns 1");
2409 close(fd);
2410 exit(1);
2411 }
2412 pid = fork();
2413 if (pid < 0)
2414 exit(1);
2415 if (pid != 0) {
2416 wait_for_pid(pid);
2417 exit(0);
2418 }
2419
2420 /* we are the child */
2421 cred.uid = 0;
2422 cred.gid = 0;
2423 cred.pid = 1;
2424 v = '1';
2425 send_creds(sock, &cred, v, true);
2426 exit(0);
2427 }
2428
2429 static pid_t get_task_reaper_pid(pid_t task)
2430 {
2431 int sock[2];
2432 pid_t pid;
2433 pid_t ret = -1;
2434 char v = '0';
2435 struct ucred cred;
2436
2437 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
2438 perror("socketpair");
2439 return -1;
2440 }
2441
2442 pid = fork();
2443 if (pid < 0)
2444 goto out;
2445 if (!pid) {
2446 close(sock[1]);
2447 write_task_init_pid_exit(sock[0], task);
2448 }
2449
2450 if (!recv_creds(sock[1], &cred, &v))
2451 goto out;
2452 ret = cred.pid;
2453
2454 out:
2455 close(sock[0]);
2456 close(sock[1]);
2457 return ret;
2458 }
2459
2460 static unsigned long get_reaper_busy(pid_t task)
2461 {
2462 pid_t init = get_task_reaper_pid(task);
2463 char *cgroup = NULL, *usage_str = NULL;
2464 unsigned long usage = 0;
2465
2466 if (init == -1)
2467 return 0;
2468
2469 cgroup = get_pid_cgroup(task, "cpuacct");
2470 if (!cgroup)
2471 goto out;
2472 if (!cgfs_get_value("cpuacct", cgroup, "cpuacct.usage", &usage_str))
2473 goto out;
2474 usage = strtoul(usage_str, NULL, 10);
2475 usage /= 100000000;
2476
2477 out:
2478 free(cgroup);
2479 free(usage_str);
2480 return usage;
2481 }
2482
2483 /*
2484 * We read /proc/uptime and reuse its second field.
2485 * For the first field, we use the mtime for the reaper for
2486 * the calling pid as returned by getreaperage
2487 */
2488 static int proc_uptime_read(char *buf, size_t size, off_t offset,
2489 struct fuse_file_info *fi)
2490 {
2491 struct fuse_context *fc = fuse_get_context();
2492 struct file_info *d = (struct file_info *)fi->fh;
2493 long int reaperage = getreaperage(fc->pid);;
2494 unsigned long int busytime = get_reaper_busy(fc->pid), idletime;
2495 char *cache = d->buf;
2496 size_t total_len = 0;
2497
2498 if (offset){
2499 if (offset > d->size)
2500 return -EINVAL;
2501 if (!d->cached)
2502 return 0;
2503 int left = d->size - offset;
2504 total_len = left > size ? size: left;
2505 memcpy(buf, cache + offset, total_len);
2506 return total_len;
2507 }
2508
2509 idletime = reaperage - busytime;
2510 if (idletime > reaperage)
2511 idletime = reaperage;
2512
2513 total_len = snprintf(d->buf, d->size, "%ld.0 %lu.0\n", reaperage, idletime);
2514 if (total_len < 0){
2515 perror("Error writing to cache");
2516 return 0;
2517 }
2518
2519 d->size = (int)total_len;
2520 d->cached = 1;
2521
2522 if (total_len > size) total_len = size;
2523
2524 memcpy(buf, d->buf, total_len);
2525 return total_len;
2526 }
2527
2528 static int proc_diskstats_read(char *buf, size_t size, off_t offset,
2529 struct fuse_file_info *fi)
2530 {
2531 char dev_name[72];
2532 struct fuse_context *fc = fuse_get_context();
2533 struct file_info *d = (struct file_info *)fi->fh;
2534 char *cg;
2535 char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
2536 *io_wait_time_str = NULL, *io_service_time_str = NULL;
2537 unsigned long read = 0, write = 0;
2538 unsigned long read_merged = 0, write_merged = 0;
2539 unsigned long read_sectors = 0, write_sectors = 0;
2540 unsigned long read_ticks = 0, write_ticks = 0;
2541 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
2542 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
2543 char *cache = d->buf;
2544 size_t cache_size = d->buflen;
2545 char *line = NULL;
2546 size_t linelen = 0, total_len = 0, rv = 0;
2547 unsigned int major = 0, minor = 0;
2548 int i = 0;
2549 FILE *f = NULL;
2550
2551 if (offset){
2552 if (offset > d->size)
2553 return -EINVAL;
2554 if (!d->cached)
2555 return 0;
2556 int left = d->size - offset;
2557 total_len = left > size ? size: left;
2558 memcpy(buf, cache + offset, total_len);
2559 return total_len;
2560 }
2561
2562 cg = get_pid_cgroup(fc->pid, "blkio");
2563 if (!cg)
2564 return read_file("/proc/diskstats", buf, size, d);
2565
2566 if (!cgfs_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
2567 goto err;
2568 if (!cgfs_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
2569 goto err;
2570 if (!cgfs_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
2571 goto err;
2572 if (!cgfs_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
2573 goto err;
2574 if (!cgfs_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
2575 goto err;
2576
2577
2578 f = fopen("/proc/diskstats", "r");
2579 if (!f)
2580 goto err;
2581
2582 while (getline(&line, &linelen, f) != -1) {
2583 size_t l;
2584 char *printme, lbuf[256];
2585
2586 i = sscanf(line, "%u %u %71s", &major, &minor, dev_name);
2587 if(i == 3){
2588 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
2589 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
2590 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
2591 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
2592 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
2593 read_sectors = read_sectors/512;
2594 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
2595 write_sectors = write_sectors/512;
2596
2597 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
2598 rd_svctm = rd_svctm/1000000;
2599 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
2600 rd_wait = rd_wait/1000000;
2601 read_ticks = rd_svctm + rd_wait;
2602
2603 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
2604 wr_svctm = wr_svctm/1000000;
2605 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
2606 wr_wait = wr_wait/1000000;
2607 write_ticks = wr_svctm + wr_wait;
2608
2609 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
2610 tot_ticks = tot_ticks/1000000;
2611 }else{
2612 continue;
2613 }
2614
2615 memset(lbuf, 0, 256);
2616 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
2617 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
2618 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
2619 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
2620 printme = lbuf;
2621 } else
2622 continue;
2623
2624 l = snprintf(cache, cache_size, "%s", printme);
2625 if (l < 0) {
2626 perror("Error writing to fuse buf");
2627 rv = 0;
2628 goto err;
2629 }
2630 if (l >= cache_size) {
2631 fprintf(stderr, "Internal error: truncated write to cache\n");
2632 rv = 0;
2633 goto err;
2634 }
2635 cache += l;
2636 cache_size -= l;
2637 total_len += l;
2638 }
2639
2640 d->cached = 1;
2641 d->size = total_len;
2642 if (total_len > size ) total_len = size;
2643 memcpy(buf, d->buf, total_len);
2644
2645 rv = total_len;
2646 err:
2647 free(cg);
2648 if (f)
2649 fclose(f);
2650 free(line);
2651 free(io_serviced_str);
2652 free(io_merged_str);
2653 free(io_service_bytes_str);
2654 free(io_wait_time_str);
2655 free(io_service_time_str);
2656 return rv;
2657 }
2658
2659 static off_t get_procfile_size(const char *which)
2660 {
2661 FILE *f = fopen(which, "r");
2662 char *line = NULL;
2663 size_t len = 0;
2664 ssize_t sz, answer = 0;
2665 if (!f)
2666 return 0;
2667
2668 while ((sz = getline(&line, &len, f)) != -1)
2669 answer += sz;
2670 fclose (f);
2671 free(line);
2672
2673 return answer;
2674 }
2675
2676 static int proc_getattr(const char *path, struct stat *sb)
2677 {
2678 struct timespec now;
2679
2680 memset(sb, 0, sizeof(struct stat));
2681 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
2682 return -EINVAL;
2683 sb->st_uid = sb->st_gid = 0;
2684 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
2685 if (strcmp(path, "/proc") == 0) {
2686 sb->st_mode = S_IFDIR | 00555;
2687 sb->st_nlink = 2;
2688 return 0;
2689 }
2690 if (strcmp(path, "/proc/meminfo") == 0 ||
2691 strcmp(path, "/proc/cpuinfo") == 0 ||
2692 strcmp(path, "/proc/uptime") == 0 ||
2693 strcmp(path, "/proc/stat") == 0 ||
2694 strcmp(path, "/proc/diskstats") == 0) {
2695 sb->st_size = 0;
2696 sb->st_mode = S_IFREG | 00444;
2697 sb->st_nlink = 1;
2698 return 0;
2699 }
2700
2701 return -ENOENT;
2702 }
2703
2704 static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2705 struct fuse_file_info *fi)
2706 {
2707 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
2708 filler(buf, "meminfo", NULL, 0) != 0 ||
2709 filler(buf, "stat", NULL, 0) != 0 ||
2710 filler(buf, "uptime", NULL, 0) != 0 ||
2711 filler(buf, "diskstats", NULL, 0) != 0)
2712 return -EINVAL;
2713 return 0;
2714 }
2715
2716 static int proc_open(const char *path, struct fuse_file_info *fi)
2717 {
2718 int type = -1;
2719 struct file_info *info;
2720
2721 if (strcmp(path, "/proc/meminfo") == 0)
2722 type = LXC_TYPE_PROC_MEMINFO;
2723 else if (strcmp(path, "/proc/cpuinfo") == 0)
2724 type = LXC_TYPE_PROC_CPUINFO;
2725 else if (strcmp(path, "/proc/uptime") == 0)
2726 type = LXC_TYPE_PROC_UPTIME;
2727 else if (strcmp(path, "/proc/stat") == 0)
2728 type = LXC_TYPE_PROC_STAT;
2729 else if (strcmp(path, "/proc/diskstats") == 0)
2730 type = LXC_TYPE_PROC_DISKSTATS;
2731 if (type == -1)
2732 return -ENOENT;
2733
2734 info = malloc(sizeof(*info));
2735 if (!info)
2736 return -ENOMEM;
2737
2738 memset(info, 0, sizeof(*info));
2739 info->type = type;
2740
2741 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
2742 do {
2743 info->buf = malloc(info->buflen);
2744 } while (!info->buf);
2745 memset(info->buf, 0, info->buflen);
2746 /* set actual size to buffer size */
2747 info->size = info->buflen;
2748
2749 fi->fh = (unsigned long)info;
2750 return 0;
2751 }
2752
2753 static int proc_release(const char *path, struct fuse_file_info *fi)
2754 {
2755 struct file_info *f = (struct file_info *)fi->fh;
2756
2757 do_release_file_info(f);
2758 return 0;
2759 }
2760
2761 static int proc_read(const char *path, char *buf, size_t size, off_t offset,
2762 struct fuse_file_info *fi)
2763 {
2764 struct file_info *f = (struct file_info *) fi->fh;
2765
2766 switch (f->type) {
2767 case LXC_TYPE_PROC_MEMINFO:
2768 return proc_meminfo_read(buf, size, offset, fi);
2769 case LXC_TYPE_PROC_CPUINFO:
2770 return proc_cpuinfo_read(buf, size, offset, fi);
2771 case LXC_TYPE_PROC_UPTIME:
2772 return proc_uptime_read(buf, size, offset, fi);
2773 case LXC_TYPE_PROC_STAT:
2774 return proc_stat_read(buf, size, offset, fi);
2775 case LXC_TYPE_PROC_DISKSTATS:
2776 return proc_diskstats_read(buf, size, offset, fi);
2777 default:
2778 return -EINVAL;
2779 }
2780 }
2781
2782 /*
2783 * FUSE ops for /
2784 * these just delegate to the /proc and /cgroup ops as
2785 * needed
2786 */
2787
2788 static int lxcfs_getattr(const char *path, struct stat *sb)
2789 {
2790 if (strcmp(path, "/") == 0) {
2791 sb->st_mode = S_IFDIR | 00755;
2792 sb->st_nlink = 2;
2793 return 0;
2794 }
2795 if (strncmp(path, "/cgroup", 7) == 0) {
2796 return cg_getattr(path, sb);
2797 }
2798 if (strncmp(path, "/proc", 5) == 0) {
2799 return proc_getattr(path, sb);
2800 }
2801 return -EINVAL;
2802 }
2803
2804 static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
2805 {
2806 if (strcmp(path, "/") == 0)
2807 return 0;
2808
2809 if (strncmp(path, "/cgroup", 7) == 0) {
2810 return cg_opendir(path, fi);
2811 }
2812 if (strcmp(path, "/proc") == 0)
2813 return 0;
2814 return -ENOENT;
2815 }
2816
2817 static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2818 struct fuse_file_info *fi)
2819 {
2820 if (strcmp(path, "/") == 0) {
2821 if (filler(buf, "proc", NULL, 0) != 0 ||
2822 filler(buf, "cgroup", NULL, 0) != 0)
2823 return -EINVAL;
2824 return 0;
2825 }
2826 if (strncmp(path, "/cgroup", 7) == 0)
2827 return cg_readdir(path, buf, filler, offset, fi);
2828 if (strcmp(path, "/proc") == 0)
2829 return proc_readdir(path, buf, filler, offset, fi);
2830 return -EINVAL;
2831 }
2832
2833 static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
2834 {
2835 if (strcmp(path, "/") == 0)
2836 return 0;
2837 if (strncmp(path, "/cgroup", 7) == 0) {
2838 return cg_releasedir(path, fi);
2839 }
2840 if (strcmp(path, "/proc") == 0)
2841 return 0;
2842 return -EINVAL;
2843 }
2844
2845 static int lxcfs_open(const char *path, struct fuse_file_info *fi)
2846 {
2847 if (strncmp(path, "/cgroup", 7) == 0)
2848 return cg_open(path, fi);
2849 if (strncmp(path, "/proc", 5) == 0)
2850 return proc_open(path, fi);
2851
2852 return -EINVAL;
2853 }
2854
2855 static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
2856 struct fuse_file_info *fi)
2857 {
2858 if (strncmp(path, "/cgroup", 7) == 0)
2859 return cg_read(path, buf, size, offset, fi);
2860 if (strncmp(path, "/proc", 5) == 0)
2861 return proc_read(path, buf, size, offset, fi);
2862
2863 return -EINVAL;
2864 }
2865
2866 int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
2867 struct fuse_file_info *fi)
2868 {
2869 if (strncmp(path, "/cgroup", 7) == 0) {
2870 return cg_write(path, buf, size, offset, fi);
2871 }
2872
2873 return -EINVAL;
2874 }
2875
2876 static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
2877 {
2878 return 0;
2879 }
2880
2881 static int lxcfs_release(const char *path, struct fuse_file_info *fi)
2882 {
2883 if (strncmp(path, "/cgroup", 7) == 0)
2884 return cg_release(path, fi);
2885 if (strncmp(path, "/proc", 5) == 0)
2886 return proc_release(path, fi);
2887
2888 return -EINVAL;
2889 }
2890
2891 static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
2892 {
2893 return 0;
2894 }
2895
2896 int lxcfs_mkdir(const char *path, mode_t mode)
2897 {
2898 if (strncmp(path, "/cgroup", 7) == 0)
2899 return cg_mkdir(path, mode);
2900
2901 return -EINVAL;
2902 }
2903
2904 int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
2905 {
2906 if (strncmp(path, "/cgroup", 7) == 0)
2907 return cg_chown(path, uid, gid);
2908
2909 return -EINVAL;
2910 }
2911
2912 /*
2913 * cat first does a truncate before doing ops->write. This doesn't
2914 * really make sense for cgroups. So just return 0 always but do
2915 * nothing.
2916 */
2917 int lxcfs_truncate(const char *path, off_t newsize)
2918 {
2919 if (strncmp(path, "/cgroup", 7) == 0)
2920 return 0;
2921 return -EINVAL;
2922 }
2923
2924 int lxcfs_rmdir(const char *path)
2925 {
2926 if (strncmp(path, "/cgroup", 7) == 0)
2927 return cg_rmdir(path);
2928 return -EINVAL;
2929 }
2930
2931 int lxcfs_chmod(const char *path, mode_t mode)
2932 {
2933 if (strncmp(path, "/cgroup", 7) == 0)
2934 return cg_chmod(path, mode);
2935 return -EINVAL;
2936 }
2937
2938 const struct fuse_operations lxcfs_ops = {
2939 .getattr = lxcfs_getattr,
2940 .readlink = NULL,
2941 .getdir = NULL,
2942 .mknod = NULL,
2943 .mkdir = lxcfs_mkdir,
2944 .unlink = NULL,
2945 .rmdir = lxcfs_rmdir,
2946 .symlink = NULL,
2947 .rename = NULL,
2948 .link = NULL,
2949 .chmod = lxcfs_chmod,
2950 .chown = lxcfs_chown,
2951 .truncate = lxcfs_truncate,
2952 .utime = NULL,
2953
2954 .open = lxcfs_open,
2955 .read = lxcfs_read,
2956 .release = lxcfs_release,
2957 .write = lxcfs_write,
2958
2959 .statfs = NULL,
2960 .flush = lxcfs_flush,
2961 .fsync = lxcfs_fsync,
2962
2963 .setxattr = NULL,
2964 .getxattr = NULL,
2965 .listxattr = NULL,
2966 .removexattr = NULL,
2967
2968 .opendir = lxcfs_opendir,
2969 .readdir = lxcfs_readdir,
2970 .releasedir = lxcfs_releasedir,
2971
2972 .fsyncdir = NULL,
2973 .init = NULL,
2974 .destroy = NULL,
2975 .access = NULL,
2976 .create = NULL,
2977 .ftruncate = NULL,
2978 .fgetattr = NULL,
2979 };
2980
2981 static void usage(const char *me)
2982 {
2983 fprintf(stderr, "Usage:\n");
2984 fprintf(stderr, "\n");
2985 fprintf(stderr, "%s mountpoint\n", me);
2986 fprintf(stderr, "%s -h\n", me);
2987 exit(1);
2988 }
2989
2990 static bool is_help(char *w)
2991 {
2992 if (strcmp(w, "-h") == 0 ||
2993 strcmp(w, "--help") == 0 ||
2994 strcmp(w, "-help") == 0 ||
2995 strcmp(w, "help") == 0)
2996 return true;
2997 return false;
2998 }
2999
3000 void swallow_arg(int *argcp, char *argv[], char *which)
3001 {
3002 int i;
3003
3004 for (i = 1; argv[i]; i++) {
3005 if (strcmp(argv[i], which) != 0)
3006 continue;
3007 for (; argv[i]; i++) {
3008 argv[i] = argv[i+1];
3009 }
3010 (*argcp)--;
3011 return;
3012 }
3013 }
3014
3015 void swallow_option(int *argcp, char *argv[], char *opt, char *v)
3016 {
3017 int i;
3018
3019 for (i = 1; argv[i]; i++) {
3020 if (!argv[i+1])
3021 continue;
3022 if (strcmp(argv[i], opt) != 0)
3023 continue;
3024 if (strcmp(argv[i+1], v) != 0) {
3025 fprintf(stderr, "Warning: unexpected fuse option %s\n", v);
3026 exit(1);
3027 }
3028 for (; argv[i+1]; i++) {
3029 argv[i] = argv[i+2];
3030 }
3031 (*argcp) -= 2;
3032 return;
3033 }
3034 }
3035
3036 int main(int argc, char *argv[])
3037 {
3038 int ret = -1;
3039 /*
3040 * what we pass to fuse_main is:
3041 * argv[0] -s -f -o allow_other,directio argv[1] NULL
3042 */
3043 int nargs = 5, cnt = 0;
3044 char *newargv[6];
3045
3046 #ifdef FORTRAVIS
3047 /* for travis which runs on 12.04 */
3048 if (glib_check_version (2, 36, 0) != NULL)
3049 g_type_init ();
3050 #endif
3051
3052 /* accomodate older init scripts */
3053 swallow_arg(&argc, argv, "-s");
3054 swallow_arg(&argc, argv, "-f");
3055 swallow_option(&argc, argv, "-o", "allow_other");
3056
3057 if (argc == 2 && strcmp(argv[1], "--version") == 0) {
3058 fprintf(stderr, "%s\n", VERSION);
3059 exit(0);
3060 }
3061 if (argc != 2 || is_help(argv[1]))
3062 usage(argv[0]);
3063
3064 newargv[cnt++] = argv[0];
3065 newargv[cnt++] = "-f";
3066 newargv[cnt++] = "-o";
3067 newargv[cnt++] = "allow_other,direct_io";
3068 newargv[cnt++] = argv[1];
3069 newargv[cnt++] = NULL;
3070
3071 if (!cgfs_setup_controllers())
3072 goto out;
3073
3074 ret = fuse_main(nargs, newargv, &lxcfs_ops, NULL);
3075
3076 out:
3077 return ret;
3078 }