]> git.proxmox.com Git - mirror_lxcfs.git/blob - lxcfs.c
Don't use tasks file to determine access rights to its cgroup
[mirror_lxcfs.git] / lxcfs.c
1 /* lxcfs
2 *
3 * Copyright © 2014,2015 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
6 * See COPYING file for details.
7 */
8
9 #define FUSE_USE_VERSION 26
10
11 #include <stdio.h>
12 #include <dirent.h>
13 #include <fcntl.h>
14 #include <fuse.h>
15 #include <unistd.h>
16 #include <errno.h>
17 #include <stdbool.h>
18 #include <time.h>
19 #include <string.h>
20 #include <stdlib.h>
21 #include <libgen.h>
22 #include <sched.h>
23 #include <linux/sched.h>
24 #include <sys/socket.h>
25 #include <sys/mount.h>
26 #include <sys/epoll.h>
27 #include <wait.h>
28
29 #ifdef FORTRAVIS
30 #define GLIB_DISABLE_DEPRECATION_WARNINGS
31 #include <glib-object.h>
32 #endif
33
34 #include "cgfs.h"
35 #include "config.h" // for VERSION
36
37 enum {
38 LXC_TYPE_CGDIR,
39 LXC_TYPE_CGFILE,
40 LXC_TYPE_PROC_MEMINFO,
41 LXC_TYPE_PROC_CPUINFO,
42 LXC_TYPE_PROC_UPTIME,
43 LXC_TYPE_PROC_STAT,
44 LXC_TYPE_PROC_DISKSTATS,
45 };
46
47 struct file_info {
48 char *controller;
49 char *cgroup;
50 char *file;
51 int type;
52 char *buf; // unused as of yet
53 int buflen;
54 int size; //actual data size
55 int cached;
56 };
57
58 /* reserve buffer size, for cpuall in /proc/stat */
59 #define BUF_RESERVE_SIZE 256
60
61 /*
62 * append pid to *src.
63 * src: a pointer to a char* in which ot append the pid.
64 * sz: the number of characters printed so far, minus trailing \0.
65 * asz: the allocated size so far
66 * pid: the pid to append
67 */
68 static void must_strcat_pid(char **src, size_t *sz, size_t *asz, pid_t pid)
69 {
70 char tmp[30];
71
72 int tmplen = sprintf(tmp, "%d\n", (int)pid);
73
74 if (!*src || tmplen + *sz + 1 >= *asz) {
75 char *tmp;
76 do {
77 tmp = realloc(*src, *asz + BUF_RESERVE_SIZE);
78 } while (!tmp);
79 *src = tmp;
80 *asz += BUF_RESERVE_SIZE;
81 }
82 memcpy((*src) +*sz , tmp, tmplen);
83 *sz += tmplen;
84 (*src)[*sz] = '\0';
85 }
86
87 static pid_t get_init_pid_for_task(pid_t task);
88
89 static int wait_for_pid(pid_t pid)
90 {
91 int status, ret;
92
93 again:
94 ret = waitpid(pid, &status, 0);
95 if (ret == -1) {
96 if (errno == EINTR)
97 goto again;
98 return -1;
99 }
100 if (ret != pid)
101 goto again;
102 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
103 return -1;
104 return 0;
105 }
106
107 /*
108 * Given a open file * to /proc/pid/{u,g}id_map, and an id
109 * valid in the caller's namespace, return the id mapped into
110 * pid's namespace.
111 * Returns the mapped id, or -1 on error.
112 */
113 unsigned int
114 convert_id_to_ns(FILE *idfile, unsigned int in_id)
115 {
116 unsigned int nsuid, // base id for a range in the idfile's namespace
117 hostuid, // base id for a range in the caller's namespace
118 count; // number of ids in this range
119 char line[400];
120 int ret;
121
122 fseek(idfile, 0L, SEEK_SET);
123 while (fgets(line, 400, idfile)) {
124 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
125 if (ret != 3)
126 continue;
127 if (hostuid + count < hostuid || nsuid + count < nsuid) {
128 /*
129 * uids wrapped around - unexpected as this is a procfile,
130 * so just bail.
131 */
132 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
133 nsuid, hostuid, count, line);
134 return -1;
135 }
136 if (hostuid <= in_id && hostuid+count > in_id) {
137 /*
138 * now since hostuid <= in_id < hostuid+count, and
139 * hostuid+count and nsuid+count do not wrap around,
140 * we know that nsuid+(in_id-hostuid) which must be
141 * less that nsuid+(count) must not wrap around
142 */
143 return (in_id - hostuid) + nsuid;
144 }
145 }
146
147 // no answer found
148 return -1;
149 }
150
151 /*
152 * for is_privileged_over,
153 * specify whether we require the calling uid to be root in his
154 * namespace
155 */
156 #define NS_ROOT_REQD true
157 #define NS_ROOT_OPT false
158
159 #define PROCLEN 100
160
161 static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
162 {
163 char fpath[PROCLEN];
164 int ret;
165 bool answer = false;
166 uid_t nsuid;
167
168 if (victim == -1 || uid == -1)
169 return false;
170
171 /*
172 * If the request is one not requiring root in the namespace,
173 * then having the same uid suffices. (i.e. uid 1000 has write
174 * access to files owned by uid 1000
175 */
176 if (!req_ns_root && uid == victim)
177 return true;
178
179 ret = snprintf(fpath, PROCLEN, "/proc/%d/uid_map", pid);
180 if (ret < 0 || ret >= PROCLEN)
181 return false;
182 FILE *f = fopen(fpath, "r");
183 if (!f)
184 return false;
185
186 /* if caller's not root in his namespace, reject */
187 nsuid = convert_id_to_ns(f, uid);
188 if (nsuid)
189 goto out;
190
191 /*
192 * If victim is not mapped into caller's ns, reject.
193 * XXX I'm not sure this check is needed given that fuse
194 * will be sending requests where the vfs has converted
195 */
196 nsuid = convert_id_to_ns(f, victim);
197 if (nsuid == -1)
198 goto out;
199
200 answer = true;
201
202 out:
203 fclose(f);
204 return answer;
205 }
206
207 static bool perms_include(int fmode, mode_t req_mode)
208 {
209 mode_t r;
210
211 switch (req_mode & O_ACCMODE) {
212 case O_RDONLY:
213 r = S_IROTH;
214 break;
215 case O_WRONLY:
216 r = S_IWOTH;
217 break;
218 case O_RDWR:
219 r = S_IROTH | S_IWOTH;
220 break;
221 default:
222 return false;
223 }
224 return ((fmode & r) == r);
225 }
226
227
228 /*
229 * taskcg is a/b/c
230 * querycg is /a/b/c/d/e
231 * we return 'd'
232 */
233 static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
234 {
235 char *start, *end;
236
237 if (strlen(taskcg) <= strlen(querycg)) {
238 fprintf(stderr, "%s: I was fed bad input\n", __func__);
239 return NULL;
240 }
241
242 if (strcmp(querycg, "/") == 0)
243 start = strdup(taskcg + 1);
244 else
245 start = strdup(taskcg + strlen(querycg) + 1);
246 if (!start)
247 return NULL;
248 end = strchr(start, '/');
249 if (end)
250 *end = '\0';
251 return start;
252 }
253
254 static void stripnewline(char *x)
255 {
256 size_t l = strlen(x);
257 if (l && x[l-1] == '\n')
258 x[l-1] = '\0';
259 }
260
261 static char *get_pid_cgroup(pid_t pid, const char *contrl)
262 {
263 char fnam[PROCLEN];
264 FILE *f;
265 char *answer = NULL;
266 char *line = NULL;
267 size_t len = 0;
268 int ret;
269 const char *h = find_mounted_controller(contrl);
270 if (!h)
271 return NULL;
272
273 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
274 if (ret < 0 || ret >= PROCLEN)
275 return NULL;
276 if (!(f = fopen(fnam, "r")))
277 return NULL;
278
279 while (getline(&line, &len, f) != -1) {
280 char *c1, *c2;
281 if (!line[0])
282 continue;
283 c1 = strchr(line, ':');
284 if (!c1)
285 goto out;
286 c1++;
287 c2 = strchr(c1, ':');
288 if (!c2)
289 goto out;
290 *c2 = '\0';
291 if (strcmp(c1, h) != 0)
292 continue;
293 c2++;
294 stripnewline(c2);
295 do {
296 answer = strdup(c2);
297 } while (!answer);
298 break;
299 }
300
301 out:
302 fclose(f);
303 free(line);
304 return answer;
305 }
306
307 /*
308 * check whether a fuse context may access a cgroup dir or file
309 *
310 * If file is not null, it is a cgroup file to check under cg.
311 * If file is null, then we are checking perms on cg itself.
312 *
313 * For files we can check the mode of the list_keys result.
314 * For cgroups, we must make assumptions based on the files under the
315 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
316 * yet.
317 */
318 static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
319 {
320 struct cgfs_files *k = NULL;
321 bool ret = false;
322
323 k = cgfs_get_key(contrl, cg, file);
324 if (!k)
325 return false;
326
327 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
328 if (perms_include(k->mode >> 6, mode)) {
329 ret = true;
330 goto out;
331 }
332 }
333 if (fc->gid == k->gid) {
334 if (perms_include(k->mode >> 3, mode)) {
335 ret = true;
336 goto out;
337 }
338 }
339 ret = perms_include(k->mode, mode);
340
341 out:
342 free_key(k);
343 return ret;
344 }
345
346 #define INITSCOPE "/init.scope"
347 static void prune_init_slice(char *cg)
348 {
349 char *point;
350 point = cg + strlen(cg) - strlen(INITSCOPE);
351 if (point < cg)
352 return;
353 if (strcmp(point, INITSCOPE) == 0) {
354 if (point == cg)
355 *(point+1) = '\0';
356 else
357 *point = '\0';
358 }
359 }
360
361 /*
362 * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
363 * If caller is in /a, he may act on /a/b, but not on /b.
364 * if the answer is false and nextcg is not NULL, then *nextcg will point
365 * to a string containing the next cgroup directory under cg, which must be
366 * freed by the caller.
367 */
368 static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
369 {
370 bool answer = false;
371 char *c2 = get_pid_cgroup(pid, contrl);
372 char *linecmp;
373
374 if (!c2)
375 return false;
376 prune_init_slice(c2);
377
378 /*
379 * callers pass in '/' for root cgroup, otherwise they pass
380 * in a cgroup without leading '/'
381 */
382 linecmp = *cg == '/' ? c2 : c2+1;
383 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
384 if (nextcg) {
385 *nextcg = get_next_cgroup_dir(linecmp, cg);
386 }
387 goto out;
388 }
389 answer = true;
390
391 out:
392 free(c2);
393 return answer;
394 }
395
396 /*
397 * If caller is in /a/b/c, he may see that /a exists, but not /b or /a/c.
398 */
399 static bool caller_may_see_dir(pid_t pid, const char *contrl, const char *cg)
400 {
401 bool answer = false;
402 char *c2, *task_cg;
403 size_t target_len, task_len;
404
405 if (strcmp(cg, "/") == 0)
406 return true;
407
408 c2 = get_pid_cgroup(pid, contrl);
409 if (!c2)
410 return false;
411 prune_init_slice(c2);
412
413 task_cg = c2 + 1;
414 target_len = strlen(cg);
415 task_len = strlen(task_cg);
416 if (task_len == 0) {
417 /* Task is in the root cg, it can see everything. This case is
418 * not handled by the strmcps below, since they test for the
419 * last /, but that is the first / that we've chopped off
420 * above.
421 */
422 answer = true;
423 goto out;
424 }
425 if (strcmp(cg, task_cg) == 0) {
426 answer = true;
427 goto out;
428 }
429 if (target_len < task_len) {
430 /* looking up a parent dir */
431 if (strncmp(task_cg, cg, target_len) == 0 && task_cg[target_len] == '/')
432 answer = true;
433 goto out;
434 }
435 if (target_len > task_len) {
436 /* looking up a child dir */
437 if (strncmp(task_cg, cg, task_len) == 0 && cg[task_len] == '/')
438 answer = true;
439 goto out;
440 }
441
442 out:
443 free(c2);
444 return answer;
445 }
446
447 /*
448 * given /cgroup/freezer/a/b, return "freezer".
449 * the returned char* should NOT be freed.
450 */
451 static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
452 {
453 const char *p1;
454 char *contr, *slash;
455
456 if (strlen(path) < 9)
457 return NULL;
458 if (*(path+7) != '/')
459 return NULL;
460 p1 = path+8;
461 contr = strdupa(p1);
462 if (!contr)
463 return NULL;
464 slash = strstr(contr, "/");
465 if (slash)
466 *slash = '\0';
467
468 int i;
469 for (i = 0; i < num_hierarchies; i++) {
470 if (hierarchies[i] && strcmp(hierarchies[i], contr) == 0)
471 return hierarchies[i];
472 }
473 return NULL;
474 }
475
476 /*
477 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
478 * Note that the returned value may include files (keynames) etc
479 */
480 static const char *find_cgroup_in_path(const char *path)
481 {
482 const char *p1;
483
484 if (strlen(path) < 9)
485 return NULL;
486 p1 = strstr(path+8, "/");
487 if (!p1)
488 return NULL;
489 return p1+1;
490 }
491
492 /*
493 * split the last path element from the path in @cg.
494 * @dir is newly allocated and should be freed, @last not
495 */
496 static void get_cgdir_and_path(const char *cg, char **dir, char **last)
497 {
498 char *p;
499
500 do {
501 *dir = strdup(cg);
502 } while (!*dir);
503 *last = strrchr(cg, '/');
504 if (!*last) {
505 *last = NULL;
506 return;
507 }
508 p = strrchr(*dir, '/');
509 *p = '\0';
510 }
511
512 /*
513 * FUSE ops for /cgroup
514 */
515
516 static int cg_getattr(const char *path, struct stat *sb)
517 {
518 struct timespec now;
519 struct fuse_context *fc = fuse_get_context();
520 char * cgdir = NULL;
521 char *last = NULL, *path1, *path2;
522 struct cgfs_files *k = NULL;
523 const char *cgroup;
524 const char *controller = NULL;
525 int ret = -ENOENT;
526
527
528 if (!fc)
529 return -EIO;
530
531 memset(sb, 0, sizeof(struct stat));
532
533 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
534 return -EINVAL;
535
536 sb->st_uid = sb->st_gid = 0;
537 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
538 sb->st_size = 0;
539
540 if (strcmp(path, "/cgroup") == 0) {
541 sb->st_mode = S_IFDIR | 00755;
542 sb->st_nlink = 2;
543 return 0;
544 }
545
546 controller = pick_controller_from_path(fc, path);
547 if (!controller)
548 return -EIO;
549 cgroup = find_cgroup_in_path(path);
550 if (!cgroup) {
551 /* this is just /cgroup/controller, return it as a dir */
552 sb->st_mode = S_IFDIR | 00755;
553 sb->st_nlink = 2;
554 return 0;
555 }
556
557 get_cgdir_and_path(cgroup, &cgdir, &last);
558
559 if (!last) {
560 path1 = "/";
561 path2 = cgdir;
562 } else {
563 path1 = cgdir;
564 path2 = last;
565 }
566
567 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
568 * Then check that caller's cgroup is under path if last is a child
569 * cgroup, or cgdir if last is a file */
570
571 if (is_child_cgroup(controller, path1, path2)) {
572 if (!caller_may_see_dir(fc->pid, controller, cgroup)) {
573 ret = -ENOENT;
574 goto out;
575 }
576 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
577 /* this is just /cgroup/controller, return it as a dir */
578 sb->st_mode = S_IFDIR | 00555;
579 sb->st_nlink = 2;
580 ret = 0;
581 goto out;
582 }
583 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
584 ret = -EACCES;
585 goto out;
586 }
587
588 // get uid, gid, from '/tasks' file and make up a mode
589 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
590 sb->st_mode = S_IFDIR | 00755;
591 k = cgfs_get_key(controller, cgroup, NULL);
592 if (!k) {
593 sb->st_uid = sb->st_gid = 0;
594 } else {
595 sb->st_uid = k->uid;
596 sb->st_gid = k->gid;
597 }
598 free_key(k);
599 sb->st_nlink = 2;
600 ret = 0;
601 goto out;
602 }
603
604 if ((k = cgfs_get_key(controller, path1, path2)) != NULL) {
605 sb->st_mode = S_IFREG | k->mode;
606 sb->st_nlink = 1;
607 sb->st_uid = k->uid;
608 sb->st_gid = k->gid;
609 sb->st_size = 0;
610 free_key(k);
611 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) {
612 ret = -ENOENT;
613 goto out;
614 }
615 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY)) {
616 ret = -EACCES;
617 goto out;
618 }
619
620 ret = 0;
621 }
622
623 out:
624 free(cgdir);
625 return ret;
626 }
627
628 static int cg_opendir(const char *path, struct fuse_file_info *fi)
629 {
630 struct fuse_context *fc = fuse_get_context();
631 const char *cgroup;
632 struct file_info *dir_info;
633 char *controller = NULL;
634
635 if (!fc)
636 return -EIO;
637
638 if (strcmp(path, "/cgroup") == 0) {
639 cgroup = NULL;
640 controller = NULL;
641 } else {
642 // return list of keys for the controller, and list of child cgroups
643 controller = pick_controller_from_path(fc, path);
644 if (!controller)
645 return -EIO;
646
647 cgroup = find_cgroup_in_path(path);
648 if (!cgroup) {
649 /* this is just /cgroup/controller, return its contents */
650 cgroup = "/";
651 }
652 }
653
654 if (cgroup) {
655 if (!caller_may_see_dir(fc->pid, controller, cgroup))
656 return -ENOENT;
657 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
658 return -EACCES;
659 }
660
661 /* we'll free this at cg_releasedir */
662 dir_info = malloc(sizeof(*dir_info));
663 if (!dir_info)
664 return -ENOMEM;
665 dir_info->controller = must_copy_string(controller);
666 dir_info->cgroup = must_copy_string(cgroup);
667 dir_info->type = LXC_TYPE_CGDIR;
668 dir_info->buf = NULL;
669 dir_info->file = NULL;
670 dir_info->buflen = 0;
671
672 fi->fh = (unsigned long)dir_info;
673 return 0;
674 }
675
676 static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
677 struct fuse_file_info *fi)
678 {
679 struct file_info *d = (struct file_info *)fi->fh;
680 struct cgfs_files **list = NULL;
681 int i, ret;
682 char *nextcg = NULL;
683 struct fuse_context *fc = fuse_get_context();
684 char **clist = NULL;
685
686 if (d->type != LXC_TYPE_CGDIR) {
687 fprintf(stderr, "Internal error: file cache info used in readdir\n");
688 return -EIO;
689 }
690 if (!d->cgroup && !d->controller) {
691 // ls /var/lib/lxcfs/cgroup - just show list of controllers
692 int i;
693
694 for (i = 0; i < num_hierarchies; i++) {
695 if (hierarchies[i] && filler(buf, hierarchies[i], NULL, 0) != 0) {
696 return -EIO;
697 }
698 }
699 return 0;
700 }
701
702 if (!cgfs_list_keys(d->controller, d->cgroup, &list)) {
703 // not a valid cgroup
704 ret = -EINVAL;
705 goto out;
706 }
707
708 if (!caller_is_in_ancestor(fc->pid, d->controller, d->cgroup, &nextcg)) {
709 if (nextcg) {
710 int ret;
711 ret = filler(buf, nextcg, NULL, 0);
712 free(nextcg);
713 if (ret != 0) {
714 ret = -EIO;
715 goto out;
716 }
717 }
718 ret = 0;
719 goto out;
720 }
721
722 for (i = 0; list[i]; i++) {
723 if (filler(buf, list[i]->name, NULL, 0) != 0) {
724 ret = -EIO;
725 goto out;
726 }
727 }
728
729 // now get the list of child cgroups
730
731 if (!cgfs_list_children(d->controller, d->cgroup, &clist)) {
732 ret = 0;
733 goto out;
734 }
735 for (i = 0; clist[i]; i++) {
736 if (filler(buf, clist[i], NULL, 0) != 0) {
737 ret = -EIO;
738 goto out;
739 }
740 }
741 ret = 0;
742
743 out:
744 free_keys(list);
745 if (clist) {
746 for (i = 0; clist[i]; i++)
747 free(clist[i]);
748 free(clist);
749 }
750 return ret;
751 }
752
753 static void do_release_file_info(struct file_info *f)
754 {
755 if (!f)
756 return;
757 free(f->controller);
758 free(f->cgroup);
759 free(f->file);
760 free(f->buf);
761 free(f);
762 }
763
764 static int cg_releasedir(const char *path, struct fuse_file_info *fi)
765 {
766 struct file_info *d = (struct file_info *)fi->fh;
767
768 do_release_file_info(d);
769 return 0;
770 }
771
772 static int cg_open(const char *path, struct fuse_file_info *fi)
773 {
774 const char *cgroup;
775 char *last = NULL, *path1, *path2, * cgdir = NULL, *controller;
776 struct cgfs_files *k = NULL;
777 struct file_info *file_info;
778 struct fuse_context *fc = fuse_get_context();
779 int ret;
780
781 if (!fc)
782 return -EIO;
783
784 controller = pick_controller_from_path(fc, path);
785 if (!controller)
786 return -EIO;
787 cgroup = find_cgroup_in_path(path);
788 if (!cgroup)
789 return -EINVAL;
790
791 get_cgdir_and_path(cgroup, &cgdir, &last);
792 if (!last) {
793 path1 = "/";
794 path2 = cgdir;
795 } else {
796 path1 = cgdir;
797 path2 = last;
798 }
799
800 k = cgfs_get_key(controller, path1, path2);
801 if (!k) {
802 ret = -EINVAL;
803 goto out;
804 }
805 free_key(k);
806
807 if (!caller_may_see_dir(fc->pid, controller, path1)) {
808 ret = -ENOENT;
809 goto out;
810 }
811 if (!fc_may_access(fc, controller, path1, path2, fi->flags)) {
812 // should never get here
813 ret = -EACCES;
814 goto out;
815 }
816
817 /* we'll free this at cg_release */
818 file_info = malloc(sizeof(*file_info));
819 if (!file_info) {
820 ret = -ENOMEM;
821 goto out;
822 }
823 file_info->controller = must_copy_string(controller);
824 file_info->cgroup = must_copy_string(path1);
825 file_info->file = must_copy_string(path2);
826 file_info->type = LXC_TYPE_CGFILE;
827 file_info->buf = NULL;
828 file_info->buflen = 0;
829
830 fi->fh = (unsigned long)file_info;
831 ret = 0;
832
833 out:
834 free(cgdir);
835 return ret;
836 }
837
838 static int cg_release(const char *path, struct fuse_file_info *fi)
839 {
840 struct file_info *f = (struct file_info *)fi->fh;
841
842 do_release_file_info(f);
843 return 0;
844 }
845
846 #define POLLIN_SET ( EPOLLIN | EPOLLHUP | EPOLLRDHUP )
847
848 static bool wait_for_sock(int sock, int timeout)
849 {
850 struct epoll_event ev;
851 int epfd, ret;
852
853 epfd = epoll_create(1);
854 if (epfd < 0) {
855 fprintf(stderr, "Failed to create epoll socket: %m\n");
856 return false;
857 }
858
859 ev.events = POLLIN_SET;
860 ev.data.fd = sock;
861 if (epoll_ctl(epfd, EPOLL_CTL_ADD, sock, &ev) < 0) {
862 fprintf(stderr, "Failed adding socket to epoll: %m\n");
863 close(epfd);
864 return false;
865 }
866
867 ret = epoll_wait(epfd, &ev, 1, timeout);
868 close(epfd);
869
870 if (ret == 0)
871 return false;
872 if (ret < 0) {
873 fprintf(stderr, "Failure during epoll_wait: %m\n");
874 return false;
875 }
876 return true;
877 }
878
879 static int msgrecv(int sockfd, void *buf, size_t len)
880 {
881 if (!wait_for_sock(sockfd, 2))
882 return -1;
883 return recv(sockfd, buf, len, MSG_DONTWAIT);
884 }
885
886 #define SEND_CREDS_OK 0
887 #define SEND_CREDS_NOTSK 1
888 #define SEND_CREDS_FAIL 2
889 static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
890 {
891 struct msghdr msg = { 0 };
892 struct iovec iov;
893 struct cmsghdr *cmsg;
894 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
895 char buf[1];
896 buf[0] = 'p';
897
898 if (pingfirst) {
899 if (msgrecv(sock, buf, 1) != 1) {
900 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
901 __func__);
902 return SEND_CREDS_FAIL;
903 }
904 }
905
906 msg.msg_control = cmsgbuf;
907 msg.msg_controllen = sizeof(cmsgbuf);
908
909 cmsg = CMSG_FIRSTHDR(&msg);
910 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
911 cmsg->cmsg_level = SOL_SOCKET;
912 cmsg->cmsg_type = SCM_CREDENTIALS;
913 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
914
915 msg.msg_name = NULL;
916 msg.msg_namelen = 0;
917
918 buf[0] = v;
919 iov.iov_base = buf;
920 iov.iov_len = sizeof(buf);
921 msg.msg_iov = &iov;
922 msg.msg_iovlen = 1;
923
924 if (sendmsg(sock, &msg, 0) < 0) {
925 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
926 strerror(errno));
927 if (errno == 3)
928 return SEND_CREDS_NOTSK;
929 return SEND_CREDS_FAIL;
930 }
931
932 return SEND_CREDS_OK;
933 }
934
935 static bool recv_creds(int sock, struct ucred *cred, char *v)
936 {
937 struct msghdr msg = { 0 };
938 struct iovec iov;
939 struct cmsghdr *cmsg;
940 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
941 char buf[1];
942 int ret;
943 int optval = 1;
944
945 *v = '1';
946
947 cred->pid = -1;
948 cred->uid = -1;
949 cred->gid = -1;
950
951 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
952 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
953 return false;
954 }
955 buf[0] = '1';
956 if (write(sock, buf, 1) != 1) {
957 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
958 return false;
959 }
960
961 msg.msg_name = NULL;
962 msg.msg_namelen = 0;
963 msg.msg_control = cmsgbuf;
964 msg.msg_controllen = sizeof(cmsgbuf);
965
966 iov.iov_base = buf;
967 iov.iov_len = sizeof(buf);
968 msg.msg_iov = &iov;
969 msg.msg_iovlen = 1;
970
971 if (!wait_for_sock(sock, 2)) {
972 fprintf(stderr, "Timed out waiting for scm_cred: %s\n",
973 strerror(errno));
974 return false;
975 }
976 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
977 if (ret < 0) {
978 fprintf(stderr, "Failed to receive scm_cred: %s\n",
979 strerror(errno));
980 return false;
981 }
982
983 cmsg = CMSG_FIRSTHDR(&msg);
984
985 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
986 cmsg->cmsg_level == SOL_SOCKET &&
987 cmsg->cmsg_type == SCM_CREDENTIALS) {
988 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
989 }
990 *v = buf[0];
991
992 return true;
993 }
994
995
996 /*
997 * pid_to_ns - reads pids from a ucred over a socket, then writes the
998 * int value back over the socket. This shifts the pid from the
999 * sender's pidns into tpid's pidns.
1000 */
1001 static void pid_to_ns(int sock, pid_t tpid)
1002 {
1003 char v = '0';
1004 struct ucred cred;
1005
1006 while (recv_creds(sock, &cred, &v)) {
1007 if (v == '1')
1008 _exit(0);
1009 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
1010 _exit(1);
1011 }
1012 _exit(0);
1013 }
1014
1015 /*
1016 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
1017 * in your old pidns. Only children which you fork will be in the target
1018 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
1019 * actually convert pids
1020 */
1021 static void pid_to_ns_wrapper(int sock, pid_t tpid)
1022 {
1023 int newnsfd = -1, ret, cpipe[2];
1024 char fnam[100];
1025 pid_t cpid;
1026 char v;
1027
1028 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1029 if (ret < 0 || ret >= sizeof(fnam))
1030 _exit(1);
1031 newnsfd = open(fnam, O_RDONLY);
1032 if (newnsfd < 0)
1033 _exit(1);
1034 if (setns(newnsfd, 0) < 0)
1035 _exit(1);
1036 close(newnsfd);
1037
1038 if (pipe(cpipe) < 0)
1039 _exit(1);
1040
1041 cpid = fork();
1042 if (cpid < 0)
1043 _exit(1);
1044
1045 if (!cpid) {
1046 char b = '1';
1047 close(cpipe[0]);
1048 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1049 fprintf(stderr, "%s (child): erorr on write: %s\n",
1050 __func__, strerror(errno));
1051 }
1052 close(cpipe[1]);
1053 pid_to_ns(sock, tpid);
1054 _exit(1); // not reached
1055 }
1056 // give the child 1 second to be done forking and
1057 // write its ack
1058 if (!wait_for_sock(cpipe[0], 1))
1059 _exit(1);
1060 ret = read(cpipe[0], &v, 1);
1061 if (ret != sizeof(char) || v != '1')
1062 _exit(1);
1063
1064 if (!wait_for_pid(cpid))
1065 _exit(1);
1066 _exit(0);
1067 }
1068
1069 /*
1070 * To read cgroup files with a particular pid, we will setns into the child
1071 * pidns, open a pipe, fork a child - which will be the first to really be in
1072 * the child ns - which does the cgfs_get_value and writes the data to the pipe.
1073 */
1074 static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
1075 {
1076 int sock[2] = {-1, -1};
1077 char *tmpdata = NULL;
1078 int ret;
1079 pid_t qpid, cpid = -1;
1080 bool answer = false;
1081 char v = '0';
1082 struct ucred cred;
1083 size_t sz = 0, asz = 0;
1084
1085 if (!cgfs_get_value(contrl, cg, file, &tmpdata))
1086 return false;
1087
1088 /*
1089 * Now we read the pids from returned data one by one, pass
1090 * them into a child in the target namespace, read back the
1091 * translated pids, and put them into our to-return data
1092 */
1093
1094 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1095 perror("socketpair");
1096 free(tmpdata);
1097 return false;
1098 }
1099
1100 cpid = fork();
1101 if (cpid == -1)
1102 goto out;
1103
1104 if (!cpid) // child - exits when done
1105 pid_to_ns_wrapper(sock[1], tpid);
1106
1107 char *ptr = tmpdata;
1108 cred.uid = 0;
1109 cred.gid = 0;
1110 while (sscanf(ptr, "%d\n", &qpid) == 1) {
1111 cred.pid = qpid;
1112 ret = send_creds(sock[0], &cred, v, true);
1113
1114 if (ret == SEND_CREDS_NOTSK)
1115 goto next;
1116 if (ret == SEND_CREDS_FAIL)
1117 goto out;
1118
1119 // read converted results
1120 if (!wait_for_sock(sock[0], 2)) {
1121 fprintf(stderr, "%s: timed out waiting for pid from child: %s\n",
1122 __func__, strerror(errno));
1123 goto out;
1124 }
1125 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
1126 fprintf(stderr, "%s: error reading pid from child: %s\n",
1127 __func__, strerror(errno));
1128 goto out;
1129 }
1130 must_strcat_pid(d, &sz, &asz, qpid);
1131 next:
1132 ptr = strchr(ptr, '\n');
1133 if (!ptr)
1134 break;
1135 ptr++;
1136 }
1137
1138 cred.pid = getpid();
1139 v = '1';
1140 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
1141 // failed to ask child to exit
1142 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
1143 __func__, strerror(errno));
1144 goto out;
1145 }
1146
1147 answer = true;
1148
1149 out:
1150 free(tmpdata);
1151 if (cpid != -1)
1152 wait_for_pid(cpid);
1153 if (sock[0] != -1) {
1154 close(sock[0]);
1155 close(sock[1]);
1156 }
1157 return answer;
1158 }
1159
1160 static int cg_read(const char *path, char *buf, size_t size, off_t offset,
1161 struct fuse_file_info *fi)
1162 {
1163 struct fuse_context *fc = fuse_get_context();
1164 struct file_info *f = (struct file_info *)fi->fh;
1165 struct cgfs_files *k = NULL;
1166 char *data = NULL;
1167 int ret, s;
1168 bool r;
1169
1170 if (f->type != LXC_TYPE_CGFILE) {
1171 fprintf(stderr, "Internal error: directory cache info used in cg_read\n");
1172 return -EIO;
1173 }
1174
1175 if (offset)
1176 return 0;
1177
1178 if (!fc)
1179 return -EIO;
1180
1181 if (!f->controller)
1182 return -EINVAL;
1183
1184 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
1185 return -EINVAL;
1186 }
1187 free_key(k);
1188
1189
1190 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY)) { // should never get here
1191 ret = -EACCES;
1192 goto out;
1193 }
1194
1195 if (strcmp(f->file, "tasks") == 0 ||
1196 strcmp(f->file, "/tasks") == 0 ||
1197 strcmp(f->file, "/cgroup.procs") == 0 ||
1198 strcmp(f->file, "cgroup.procs") == 0)
1199 // special case - we have to translate the pids
1200 r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data);
1201 else
1202 r = cgfs_get_value(f->controller, f->cgroup, f->file, &data);
1203
1204 if (!r) {
1205 ret = -EINVAL;
1206 goto out;
1207 }
1208
1209 if (!data) {
1210 ret = 0;
1211 goto out;
1212 }
1213 s = strlen(data);
1214 if (s > size)
1215 s = size;
1216 memcpy(buf, data, s);
1217 if (s > 0 && s < size && data[s-1] != '\n')
1218 buf[s++] = '\n';
1219
1220 ret = s;
1221
1222 out:
1223 free(data);
1224 return ret;
1225 }
1226
1227 static void pid_from_ns(int sock, pid_t tpid)
1228 {
1229 pid_t vpid;
1230 struct ucred cred;
1231 char v;
1232 int ret;
1233
1234 cred.uid = 0;
1235 cred.gid = 0;
1236 while (1) {
1237 if (!wait_for_sock(sock, 2)) {
1238 fprintf(stderr, "%s: timeout reading from parent\n", __func__);
1239 _exit(1);
1240 }
1241 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
1242 fprintf(stderr, "%s: bad read from parent: %s\n",
1243 __func__, strerror(errno));
1244 _exit(1);
1245 }
1246 if (vpid == -1) // done
1247 break;
1248 v = '0';
1249 cred.pid = vpid;
1250 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
1251 v = '1';
1252 cred.pid = getpid();
1253 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
1254 _exit(1);
1255 }
1256 }
1257 _exit(0);
1258 }
1259
1260 static void pid_from_ns_wrapper(int sock, pid_t tpid)
1261 {
1262 int newnsfd = -1, ret, cpipe[2];
1263 char fnam[100];
1264 pid_t cpid;
1265 char v;
1266
1267 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1268 if (ret < 0 || ret >= sizeof(fnam))
1269 _exit(1);
1270 newnsfd = open(fnam, O_RDONLY);
1271 if (newnsfd < 0)
1272 _exit(1);
1273 if (setns(newnsfd, 0) < 0)
1274 _exit(1);
1275 close(newnsfd);
1276
1277 if (pipe(cpipe) < 0)
1278 _exit(1);
1279
1280 loop:
1281 cpid = fork();
1282
1283 if (cpid < 0)
1284 _exit(1);
1285
1286 if (!cpid) {
1287 char b = '1';
1288 close(cpipe[0]);
1289 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1290 fprintf(stderr, "%s (child): erorr on write: %s\n",
1291 __func__, strerror(errno));
1292 }
1293 close(cpipe[1]);
1294 pid_from_ns(sock, tpid);
1295 }
1296
1297 // give the child 1 second to be done forking and
1298 // write it's ack
1299 if (!wait_for_sock(cpipe[0], 1))
1300 goto again;
1301 ret = read(cpipe[0], &v, 1);
1302 if (ret != sizeof(char) || v != '1') {
1303 goto again;
1304 }
1305
1306 if (!wait_for_pid(cpid))
1307 _exit(1);
1308 _exit(0);
1309
1310 again:
1311 kill(cpid, SIGKILL);
1312 wait_for_pid(cpid);
1313 goto loop;
1314 }
1315
1316 /*
1317 * Given host @uid, return the uid to which it maps in
1318 * @pid's user namespace, or -1 if none.
1319 */
1320 bool hostuid_to_ns(uid_t uid, pid_t pid, uid_t *answer)
1321 {
1322 FILE *f;
1323 char line[400];
1324
1325 sprintf(line, "/proc/%d/uid_map", pid);
1326 if ((f = fopen(line, "r")) == NULL) {
1327 return false;
1328 }
1329
1330 *answer = convert_id_to_ns(f, uid);
1331 fclose(f);
1332
1333 if (*answer == -1)
1334 return false;
1335 return true;
1336 }
1337
1338 /*
1339 * get_pid_creds: get the real uid and gid of @pid from
1340 * /proc/$$/status
1341 * (XXX should we use euid here?)
1342 */
1343 void get_pid_creds(pid_t pid, uid_t *uid, gid_t *gid)
1344 {
1345 char line[400];
1346 uid_t u;
1347 gid_t g;
1348 FILE *f;
1349
1350 *uid = -1;
1351 *gid = -1;
1352 sprintf(line, "/proc/%d/status", pid);
1353 if ((f = fopen(line, "r")) == NULL) {
1354 fprintf(stderr, "Error opening %s: %s\n", line, strerror(errno));
1355 return;
1356 }
1357 while (fgets(line, 400, f)) {
1358 if (strncmp(line, "Uid:", 4) == 0) {
1359 if (sscanf(line+4, "%u", &u) != 1) {
1360 fprintf(stderr, "bad uid line for pid %u\n", pid);
1361 fclose(f);
1362 return;
1363 }
1364 *uid = u;
1365 } else if (strncmp(line, "Gid:", 4) == 0) {
1366 if (sscanf(line+4, "%u", &g) != 1) {
1367 fprintf(stderr, "bad gid line for pid %u\n", pid);
1368 fclose(f);
1369 return;
1370 }
1371 *gid = g;
1372 }
1373 }
1374 fclose(f);
1375 }
1376
1377 /*
1378 * May the requestor @r move victim @v to a new cgroup?
1379 * This is allowed if
1380 * . they are the same task
1381 * . they are ownedy by the same uid
1382 * . @r is root on the host, or
1383 * . @v's uid is mapped into @r's where @r is root.
1384 */
1385 bool may_move_pid(pid_t r, uid_t r_uid, pid_t v)
1386 {
1387 uid_t v_uid, tmpuid;
1388 gid_t v_gid;
1389
1390 if (r == v)
1391 return true;
1392 if (r_uid == 0)
1393 return true;
1394 get_pid_creds(v, &v_uid, &v_gid);
1395 if (r_uid == v_uid)
1396 return true;
1397 if (hostuid_to_ns(r_uid, r, &tmpuid) && tmpuid == 0
1398 && hostuid_to_ns(v_uid, r, &tmpuid))
1399 return true;
1400 return false;
1401 }
1402
1403 static bool do_write_pids(pid_t tpid, uid_t tuid, const char *contrl, const char *cg,
1404 const char *file, const char *buf)
1405 {
1406 int sock[2] = {-1, -1};
1407 pid_t qpid, cpid = -1;
1408 FILE *pids_file = NULL;
1409 bool answer = false, fail = false;
1410
1411 pids_file = open_pids_file(contrl, cg);
1412 if (!pids_file)
1413 return false;
1414
1415 /*
1416 * write the pids to a socket, have helper in writer's pidns
1417 * call movepid for us
1418 */
1419 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1420 perror("socketpair");
1421 goto out;
1422 }
1423
1424 cpid = fork();
1425 if (cpid == -1)
1426 goto out;
1427
1428 if (!cpid) { // child
1429 fclose(pids_file);
1430 pid_from_ns_wrapper(sock[1], tpid);
1431 }
1432
1433 const char *ptr = buf;
1434 while (sscanf(ptr, "%d", &qpid) == 1) {
1435 struct ucred cred;
1436 char v;
1437
1438 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
1439 fprintf(stderr, "%s: error writing pid to child: %s\n",
1440 __func__, strerror(errno));
1441 goto out;
1442 }
1443
1444 if (recv_creds(sock[0], &cred, &v)) {
1445 if (v == '0') {
1446 if (!may_move_pid(tpid, tuid, cred.pid)) {
1447 fail = true;
1448 break;
1449 }
1450 if (fprintf(pids_file, "%d", (int) cred.pid) < 0)
1451 fail = true;
1452 }
1453 }
1454
1455 ptr = strchr(ptr, '\n');
1456 if (!ptr)
1457 break;
1458 ptr++;
1459 }
1460
1461 /* All good, write the value */
1462 qpid = -1;
1463 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
1464 fprintf(stderr, "Warning: failed to ask child to exit\n");
1465
1466 if (!fail)
1467 answer = true;
1468
1469 out:
1470 if (cpid != -1)
1471 wait_for_pid(cpid);
1472 if (sock[0] != -1) {
1473 close(sock[0]);
1474 close(sock[1]);
1475 }
1476 if (pids_file) {
1477 if (fclose(pids_file) != 0)
1478 answer = false;
1479 }
1480 return answer;
1481 }
1482
1483 int cg_write(const char *path, const char *buf, size_t size, off_t offset,
1484 struct fuse_file_info *fi)
1485 {
1486 struct fuse_context *fc = fuse_get_context();
1487 char *localbuf = NULL;
1488 struct cgfs_files *k = NULL;
1489 struct file_info *f = (struct file_info *)fi->fh;
1490 bool r;
1491
1492 if (f->type != LXC_TYPE_CGFILE) {
1493 fprintf(stderr, "Internal error: directory cache info used in cg_write\n");
1494 return -EIO;
1495 }
1496
1497 if (offset)
1498 return 0;
1499
1500 if (!fc)
1501 return -EIO;
1502
1503 localbuf = alloca(size+1);
1504 localbuf[size] = '\0';
1505 memcpy(localbuf, buf, size);
1506
1507 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
1508 size = -EINVAL;
1509 goto out;
1510 }
1511
1512 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY)) {
1513 size = -EACCES;
1514 goto out;
1515 }
1516
1517 if (strcmp(f->file, "tasks") == 0 ||
1518 strcmp(f->file, "/tasks") == 0 ||
1519 strcmp(f->file, "/cgroup.procs") == 0 ||
1520 strcmp(f->file, "cgroup.procs") == 0)
1521 // special case - we have to translate the pids
1522 r = do_write_pids(fc->pid, fc->uid, f->controller, f->cgroup, f->file, localbuf);
1523 else
1524 r = cgfs_set_value(f->controller, f->cgroup, f->file, localbuf);
1525
1526 if (!r)
1527 size = -EINVAL;
1528
1529 out:
1530 free_key(k);
1531 return size;
1532 }
1533
1534 int cg_chown(const char *path, uid_t uid, gid_t gid)
1535 {
1536 struct fuse_context *fc = fuse_get_context();
1537 char *cgdir = NULL, *last = NULL, *path1, *path2, *controller;
1538 struct cgfs_files *k = NULL;
1539 const char *cgroup;
1540 int ret;
1541
1542 if (!fc)
1543 return -EIO;
1544
1545 if (strcmp(path, "/cgroup") == 0)
1546 return -EINVAL;
1547
1548 controller = pick_controller_from_path(fc, path);
1549 if (!controller)
1550 return -EINVAL;
1551 cgroup = find_cgroup_in_path(path);
1552 if (!cgroup)
1553 /* this is just /cgroup/controller */
1554 return -EINVAL;
1555
1556 get_cgdir_and_path(cgroup, &cgdir, &last);
1557
1558 if (!last) {
1559 path1 = "/";
1560 path2 = cgdir;
1561 } else {
1562 path1 = cgdir;
1563 path2 = last;
1564 }
1565
1566 if (is_child_cgroup(controller, path1, path2)) {
1567 // get uid, gid, from '/tasks' file and make up a mode
1568 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1569 k = cgfs_get_key(controller, cgroup, "tasks");
1570
1571 } else
1572 k = cgfs_get_key(controller, path1, path2);
1573
1574 if (!k) {
1575 ret = -EINVAL;
1576 goto out;
1577 }
1578
1579 /*
1580 * This being a fuse request, the uid and gid must be valid
1581 * in the caller's namespace. So we can just check to make
1582 * sure that the caller is root in his uid, and privileged
1583 * over the file's current owner.
1584 */
1585 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) {
1586 ret = -EACCES;
1587 goto out;
1588 }
1589
1590 ret = cgfs_chown_file(controller, cgroup, uid, gid);
1591
1592 out:
1593 free_key(k);
1594 free(cgdir);
1595
1596 return ret;
1597 }
1598
1599 int cg_chmod(const char *path, mode_t mode)
1600 {
1601 struct fuse_context *fc = fuse_get_context();
1602 char * cgdir = NULL, *last = NULL, *path1, *path2, *controller;
1603 struct cgfs_files *k = NULL;
1604 const char *cgroup;
1605 int ret;
1606
1607 if (!fc)
1608 return -EIO;
1609
1610 if (strcmp(path, "/cgroup") == 0)
1611 return -EINVAL;
1612
1613 controller = pick_controller_from_path(fc, path);
1614 if (!controller)
1615 return -EINVAL;
1616 cgroup = find_cgroup_in_path(path);
1617 if (!cgroup)
1618 /* this is just /cgroup/controller */
1619 return -EINVAL;
1620
1621 get_cgdir_and_path(cgroup, &cgdir, &last);
1622
1623 if (!last) {
1624 path1 = "/";
1625 path2 = cgdir;
1626 } else {
1627 path1 = cgdir;
1628 path2 = last;
1629 }
1630
1631 if (is_child_cgroup(controller, path1, path2)) {
1632 // get uid, gid, from '/tasks' file and make up a mode
1633 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1634 k = cgfs_get_key(controller, cgroup, "tasks");
1635
1636 } else
1637 k = cgfs_get_key(controller, path1, path2);
1638
1639 if (!k) {
1640 ret = -EINVAL;
1641 goto out;
1642 }
1643
1644 /*
1645 * This being a fuse request, the uid and gid must be valid
1646 * in the caller's namespace. So we can just check to make
1647 * sure that the caller is root in his uid, and privileged
1648 * over the file's current owner.
1649 */
1650 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
1651 ret = -EPERM;
1652 goto out;
1653 }
1654
1655 if (!cgfs_chmod_file(controller, cgroup, mode)) {
1656 ret = -EINVAL;
1657 goto out;
1658 }
1659
1660 ret = 0;
1661 out:
1662 free_key(k);
1663 free(cgdir);
1664 return ret;
1665 }
1666
1667 int cg_mkdir(const char *path, mode_t mode)
1668 {
1669 struct fuse_context *fc = fuse_get_context();
1670 char *last = NULL, *path1, *cgdir = NULL, *controller, *next = NULL;
1671 const char *cgroup;
1672 int ret;
1673
1674 if (!fc)
1675 return -EIO;
1676
1677
1678 controller = pick_controller_from_path(fc, path);
1679 if (!controller)
1680 return -EINVAL;
1681
1682 cgroup = find_cgroup_in_path(path);
1683 if (!cgroup)
1684 return -EINVAL;
1685
1686 get_cgdir_and_path(cgroup, &cgdir, &last);
1687 if (!last)
1688 path1 = "/";
1689 else
1690 path1 = cgdir;
1691
1692 if (!caller_is_in_ancestor(fc->pid, controller, path1, &next)) {
1693 if (last && strcmp(next, last) == 0)
1694 ret = -EEXIST;
1695 else
1696 ret = -ENOENT;
1697 goto out;
1698 }
1699
1700 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) {
1701 ret = -EACCES;
1702 goto out;
1703 }
1704 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) {
1705 ret = -EACCES;
1706 goto out;
1707 }
1708
1709 ret = cgfs_create(controller, cgroup, fc->uid, fc->gid);
1710
1711 out:
1712 free(cgdir);
1713 free(next);
1714 return ret;
1715 }
1716
1717 static int cg_rmdir(const char *path)
1718 {
1719 struct fuse_context *fc = fuse_get_context();
1720 char *last = NULL, *cgdir = NULL, *controller, *next = NULL;
1721 const char *cgroup;
1722 int ret;
1723
1724 if (!fc)
1725 return -EIO;
1726
1727 controller = pick_controller_from_path(fc, path);
1728 if (!controller)
1729 return -EINVAL;
1730
1731 cgroup = find_cgroup_in_path(path);
1732 if (!cgroup)
1733 return -EINVAL;
1734
1735 get_cgdir_and_path(cgroup, &cgdir, &last);
1736 if (!last) {
1737 ret = -EINVAL;
1738 goto out;
1739 }
1740
1741 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, &next)) {
1742 if (!last || strcmp(next, last) == 0)
1743 ret = -EBUSY;
1744 else
1745 ret = -ENOENT;
1746 goto out;
1747 }
1748
1749 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) {
1750 ret = -EACCES;
1751 goto out;
1752 }
1753 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
1754 ret = -EACCES;
1755 goto out;
1756 }
1757
1758 if (!cgfs_remove(controller, cgroup)) {
1759 ret = -EINVAL;
1760 goto out;
1761 }
1762
1763 ret = 0;
1764
1765 out:
1766 free(cgdir);
1767 free(next);
1768 return ret;
1769 }
1770
1771 static bool startswith(const char *line, const char *pref)
1772 {
1773 if (strncmp(line, pref, strlen(pref)) == 0)
1774 return true;
1775 return false;
1776 }
1777
1778 static void get_mem_cached(char *memstat, unsigned long *v)
1779 {
1780 char *eol;
1781
1782 *v = 0;
1783 while (*memstat) {
1784 if (startswith(memstat, "total_cache")) {
1785 sscanf(memstat + 11, "%lu", v);
1786 *v /= 1024;
1787 return;
1788 }
1789 eol = strchr(memstat, '\n');
1790 if (!eol)
1791 return;
1792 memstat = eol+1;
1793 }
1794 }
1795
1796 static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
1797 {
1798 char *eol;
1799 char key[32];
1800
1801 memset(key, 0, 32);
1802 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
1803
1804 size_t len = strlen(key);
1805 *v = 0;
1806
1807 while (*str) {
1808 if (startswith(str, key)) {
1809 sscanf(str + len, "%lu", v);
1810 return;
1811 }
1812 eol = strchr(str, '\n');
1813 if (!eol)
1814 return;
1815 str = eol+1;
1816 }
1817 }
1818
1819 static int read_file(const char *path, char *buf, size_t size,
1820 struct file_info *d)
1821 {
1822 size_t linelen = 0, total_len = 0, rv = 0;
1823 char *line = NULL;
1824 char *cache = d->buf;
1825 size_t cache_size = d->buflen;
1826 FILE *f = fopen(path, "r");
1827 if (!f)
1828 return 0;
1829
1830 while (getline(&line, &linelen, f) != -1) {
1831 size_t l = snprintf(cache, cache_size, "%s", line);
1832 if (l < 0) {
1833 perror("Error writing to cache");
1834 rv = 0;
1835 goto err;
1836 }
1837 if (l >= cache_size) {
1838 fprintf(stderr, "Internal error: truncated write to cache\n");
1839 rv = 0;
1840 goto err;
1841 }
1842 if (l < cache_size) {
1843 cache += l;
1844 cache_size -= l;
1845 total_len += l;
1846 } else {
1847 cache += cache_size;
1848 total_len += cache_size;
1849 cache_size = 0;
1850 break;
1851 }
1852 }
1853
1854 d->size = total_len;
1855 if (total_len > size ) total_len = size;
1856
1857 /* read from off 0 */
1858 memcpy(buf, d->buf, total_len);
1859 rv = total_len;
1860 err:
1861 fclose(f);
1862 free(line);
1863 return rv;
1864 }
1865
1866 /*
1867 * FUSE ops for /proc
1868 */
1869
1870 static unsigned long get_memlimit(const char *cgroup)
1871 {
1872 char *memlimit_str = NULL;
1873 unsigned long memlimit = -1;
1874
1875 if (cgfs_get_value("memory", cgroup, "memory.limit_in_bytes", &memlimit_str))
1876 memlimit = strtoul(memlimit_str, NULL, 10);
1877
1878 free(memlimit_str);
1879
1880 return memlimit;
1881 }
1882
1883 static unsigned long get_min_memlimit(const char *cgroup)
1884 {
1885 char *copy = strdupa(cgroup);
1886 unsigned long memlimit = 0, retlimit;
1887
1888 retlimit = get_memlimit(copy);
1889
1890 while (strcmp(copy, "/") != 0) {
1891 copy = dirname(copy);
1892 memlimit = get_memlimit(copy);
1893 if (memlimit != -1 && memlimit < retlimit)
1894 retlimit = memlimit;
1895 };
1896
1897 return retlimit;
1898 }
1899
1900 static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1901 struct fuse_file_info *fi)
1902 {
1903 struct fuse_context *fc = fuse_get_context();
1904 struct file_info *d = (struct file_info *)fi->fh;
1905 char *cg;
1906 char *memusage_str = NULL, *memstat_str = NULL,
1907 *memswlimit_str = NULL, *memswusage_str = NULL,
1908 *memswlimit_default_str = NULL, *memswusage_default_str = NULL;
1909 unsigned long memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0,
1910 cached = 0, hosttotal = 0;
1911 char *line = NULL;
1912 size_t linelen = 0, total_len = 0, rv = 0;
1913 char *cache = d->buf;
1914 size_t cache_size = d->buflen;
1915 FILE *f = NULL;
1916
1917 if (offset){
1918 if (offset > d->size)
1919 return -EINVAL;
1920 if (!d->cached)
1921 return 0;
1922 int left = d->size - offset;
1923 total_len = left > size ? size: left;
1924 memcpy(buf, cache + offset, total_len);
1925 return total_len;
1926 }
1927
1928 cg = get_pid_cgroup(fc->pid, "memory");
1929 if (!cg)
1930 return read_file("/proc/meminfo", buf, size, d);
1931
1932 memlimit = get_min_memlimit(cg);
1933 if (!cgfs_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
1934 goto err;
1935 if (!cgfs_get_value("memory", cg, "memory.stat", &memstat_str))
1936 goto err;
1937
1938 // Following values are allowed to fail, because swapaccount might be turned
1939 // off for current kernel
1940 if(cgfs_get_value("memory", cg, "memory.memsw.limit_in_bytes", &memswlimit_str) &&
1941 cgfs_get_value("memory", cg, "memory.memsw.usage_in_bytes", &memswusage_str))
1942 {
1943 /* If swapaccounting is turned on, then default value is assumed to be that of cgroup / */
1944 if (!cgfs_get_value("memory", "/", "memory.memsw.limit_in_bytes", &memswlimit_default_str))
1945 goto err;
1946 if (!cgfs_get_value("memory", "/", "memory.memsw.usage_in_bytes", &memswusage_default_str))
1947 goto err;
1948
1949 memswlimit = strtoul(memswlimit_str, NULL, 10);
1950 memswusage = strtoul(memswusage_str, NULL, 10);
1951
1952 if (!strcmp(memswlimit_str, memswlimit_default_str))
1953 memswlimit = 0;
1954 if (!strcmp(memswusage_str, memswusage_default_str))
1955 memswusage = 0;
1956
1957 memswlimit = memswlimit / 1024;
1958 memswusage = memswusage / 1024;
1959 }
1960
1961 memusage = strtoul(memusage_str, NULL, 10);
1962 memlimit /= 1024;
1963 memusage /= 1024;
1964
1965 get_mem_cached(memstat_str, &cached);
1966
1967 f = fopen("/proc/meminfo", "r");
1968 if (!f)
1969 goto err;
1970
1971 while (getline(&line, &linelen, f) != -1) {
1972 size_t l;
1973 char *printme, lbuf[100];
1974
1975 memset(lbuf, 0, 100);
1976 if (startswith(line, "MemTotal:")) {
1977 sscanf(line+14, "%lu", &hosttotal);
1978 if (hosttotal < memlimit)
1979 memlimit = hosttotal;
1980 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
1981 printme = lbuf;
1982 } else if (startswith(line, "MemFree:")) {
1983 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
1984 printme = lbuf;
1985 } else if (startswith(line, "MemAvailable:")) {
1986 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
1987 printme = lbuf;
1988 } else if (startswith(line, "SwapTotal:") && memswlimit > 0) {
1989 snprintf(lbuf, 100, "SwapTotal: %8lu kB\n", memswlimit - memlimit);
1990 printme = lbuf;
1991 } else if (startswith(line, "SwapFree:") && memswlimit > 0 && memswusage > 0) {
1992 snprintf(lbuf, 100, "SwapFree: %8lu kB\n",
1993 (memswlimit - memlimit) - (memswusage - memusage));
1994 printme = lbuf;
1995 } else if (startswith(line, "Buffers:")) {
1996 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
1997 printme = lbuf;
1998 } else if (startswith(line, "Cached:")) {
1999 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
2000 printme = lbuf;
2001 } else if (startswith(line, "SwapCached:")) {
2002 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
2003 printme = lbuf;
2004 } else
2005 printme = line;
2006
2007 l = snprintf(cache, cache_size, "%s", printme);
2008 if (l < 0) {
2009 perror("Error writing to cache");
2010 rv = 0;
2011 goto err;
2012
2013 }
2014 if (l >= cache_size) {
2015 fprintf(stderr, "Internal error: truncated write to cache\n");
2016 rv = 0;
2017 goto err;
2018 }
2019
2020 cache += l;
2021 cache_size -= l;
2022 total_len += l;
2023 }
2024
2025 d->cached = 1;
2026 d->size = total_len;
2027 if (total_len > size ) total_len = size;
2028 memcpy(buf, d->buf, total_len);
2029
2030 rv = total_len;
2031 err:
2032 if (f)
2033 fclose(f);
2034 free(line);
2035 free(cg);
2036 free(memusage_str);
2037 free(memswlimit_str);
2038 free(memswusage_str);
2039 free(memstat_str);
2040 free(memswlimit_default_str);
2041 free(memswusage_default_str);
2042 return rv;
2043 }
2044
2045 /*
2046 * Read the cpuset.cpus for cg
2047 * Return the answer in a newly allocated string which must be freed
2048 */
2049 static char *get_cpuset(const char *cg)
2050 {
2051 char *answer;
2052
2053 if (!cgfs_get_value("cpuset", cg, "cpuset.cpus", &answer))
2054 return NULL;
2055 return answer;
2056 }
2057
2058 bool cpu_in_cpuset(int cpu, const char *cpuset);
2059
2060 static bool cpuline_in_cpuset(const char *line, const char *cpuset)
2061 {
2062 int cpu;
2063
2064 if (sscanf(line, "processor : %d", &cpu) != 1)
2065 return false;
2066 return cpu_in_cpuset(cpu, cpuset);
2067 }
2068
2069 /*
2070 * check whether this is a '^processor" line in /proc/cpuinfo
2071 */
2072 static bool is_processor_line(const char *line)
2073 {
2074 int cpu;
2075
2076 if (sscanf(line, "processor : %d", &cpu) == 1)
2077 return true;
2078 return false;
2079 }
2080
2081 static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
2082 struct fuse_file_info *fi)
2083 {
2084 struct fuse_context *fc = fuse_get_context();
2085 struct file_info *d = (struct file_info *)fi->fh;
2086 char *cg;
2087 char *cpuset = NULL;
2088 char *line = NULL;
2089 size_t linelen = 0, total_len = 0, rv = 0;
2090 bool am_printing = false;
2091 int curcpu = -1;
2092 char *cache = d->buf;
2093 size_t cache_size = d->buflen;
2094 FILE *f = NULL;
2095
2096 if (offset){
2097 if (offset > d->size)
2098 return -EINVAL;
2099 if (!d->cached)
2100 return 0;
2101 int left = d->size - offset;
2102 total_len = left > size ? size: left;
2103 memcpy(buf, cache + offset, total_len);
2104 return total_len;
2105 }
2106
2107 cg = get_pid_cgroup(fc->pid, "cpuset");
2108 if (!cg)
2109 return read_file("proc/cpuinfo", buf, size, d);
2110
2111 cpuset = get_cpuset(cg);
2112 if (!cpuset)
2113 goto err;
2114
2115 f = fopen("/proc/cpuinfo", "r");
2116 if (!f)
2117 goto err;
2118
2119 while (getline(&line, &linelen, f) != -1) {
2120 size_t l;
2121 if (is_processor_line(line)) {
2122 am_printing = cpuline_in_cpuset(line, cpuset);
2123 if (am_printing) {
2124 curcpu ++;
2125 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
2126 if (l < 0) {
2127 perror("Error writing to cache");
2128 rv = 0;
2129 goto err;
2130 }
2131 if (l >= cache_size) {
2132 fprintf(stderr, "Internal error: truncated write to cache\n");
2133 rv = 0;
2134 goto err;
2135 }
2136 if (l < cache_size){
2137 cache += l;
2138 cache_size -= l;
2139 total_len += l;
2140 }else{
2141 cache += cache_size;
2142 total_len += cache_size;
2143 cache_size = 0;
2144 break;
2145 }
2146 }
2147 continue;
2148 }
2149 if (am_printing) {
2150 l = snprintf(cache, cache_size, "%s", line);
2151 if (l < 0) {
2152 perror("Error writing to cache");
2153 rv = 0;
2154 goto err;
2155 }
2156 if (l >= cache_size) {
2157 fprintf(stderr, "Internal error: truncated write to cache\n");
2158 rv = 0;
2159 goto err;
2160 }
2161 if (l < cache_size) {
2162 cache += l;
2163 cache_size -= l;
2164 total_len += l;
2165 } else {
2166 cache += cache_size;
2167 total_len += cache_size;
2168 cache_size = 0;
2169 break;
2170 }
2171 }
2172 }
2173
2174 d->cached = 1;
2175 d->size = total_len;
2176 if (total_len > size ) total_len = size;
2177
2178 /* read from off 0 */
2179 memcpy(buf, d->buf, total_len);
2180 rv = total_len;
2181 err:
2182 if (f)
2183 fclose(f);
2184 free(line);
2185 free(cpuset);
2186 free(cg);
2187 return rv;
2188 }
2189
2190 static int proc_stat_read(char *buf, size_t size, off_t offset,
2191 struct fuse_file_info *fi)
2192 {
2193 struct fuse_context *fc = fuse_get_context();
2194 struct file_info *d = (struct file_info *)fi->fh;
2195 char *cg;
2196 char *cpuset = NULL;
2197 char *line = NULL;
2198 size_t linelen = 0, total_len = 0, rv = 0;
2199 int curcpu = -1; /* cpu numbering starts at 0 */
2200 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0;
2201 unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
2202 irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0;
2203 #define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
2204 char cpuall[CPUALL_MAX_SIZE];
2205 /* reserve for cpu all */
2206 char *cache = d->buf + CPUALL_MAX_SIZE;
2207 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
2208 FILE *f = NULL;
2209
2210 if (offset){
2211 if (offset > d->size)
2212 return -EINVAL;
2213 if (!d->cached)
2214 return 0;
2215 int left = d->size - offset;
2216 total_len = left > size ? size: left;
2217 memcpy(buf, d->buf + offset, total_len);
2218 return total_len;
2219 }
2220
2221 cg = get_pid_cgroup(fc->pid, "cpuset");
2222 if (!cg)
2223 return read_file("/proc/stat", buf, size, d);
2224
2225 cpuset = get_cpuset(cg);
2226 if (!cpuset)
2227 goto err;
2228
2229 f = fopen("/proc/stat", "r");
2230 if (!f)
2231 goto err;
2232
2233 //skip first line
2234 if (getline(&line, &linelen, f) < 0) {
2235 fprintf(stderr, "proc_stat_read read first line failed\n");
2236 goto err;
2237 }
2238
2239 while (getline(&line, &linelen, f) != -1) {
2240 size_t l;
2241 int cpu;
2242 char cpu_char[10]; /* That's a lot of cores */
2243 char *c;
2244
2245 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
2246 /* not a ^cpuN line containing a number N, just print it */
2247 l = snprintf(cache, cache_size, "%s", line);
2248 if (l < 0) {
2249 perror("Error writing to cache");
2250 rv = 0;
2251 goto err;
2252 }
2253 if (l >= cache_size) {
2254 fprintf(stderr, "Internal error: truncated write to cache\n");
2255 rv = 0;
2256 goto err;
2257 }
2258 if (l < cache_size) {
2259 cache += l;
2260 cache_size -= l;
2261 total_len += l;
2262 continue;
2263 } else {
2264 //no more space, break it
2265 cache += cache_size;
2266 total_len += cache_size;
2267 cache_size = 0;
2268 break;
2269 }
2270 }
2271
2272 if (sscanf(cpu_char, "%d", &cpu) != 1)
2273 continue;
2274 if (!cpu_in_cpuset(cpu, cpuset))
2275 continue;
2276 curcpu ++;
2277
2278 c = strchr(line, ' ');
2279 if (!c)
2280 continue;
2281 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
2282 if (l < 0) {
2283 perror("Error writing to cache");
2284 rv = 0;
2285 goto err;
2286
2287 }
2288 if (l >= cache_size) {
2289 fprintf(stderr, "Internal error: truncated write to cache\n");
2290 rv = 0;
2291 goto err;
2292 }
2293
2294 cache += l;
2295 cache_size -= l;
2296 total_len += l;
2297
2298 if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, &idle, &iowait, &irq,
2299 &softirq, &steal, &guest) != 9)
2300 continue;
2301 user_sum += user;
2302 nice_sum += nice;
2303 system_sum += system;
2304 idle_sum += idle;
2305 iowait_sum += iowait;
2306 irq_sum += irq;
2307 softirq_sum += softirq;
2308 steal_sum += steal;
2309 guest_sum += guest;
2310 }
2311
2312 cache = d->buf;
2313
2314 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
2315 "cpu ", user_sum, nice_sum, system_sum, idle_sum, iowait_sum, irq_sum, softirq_sum, steal_sum, guest_sum);
2316 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE){
2317 memcpy(cache, cpuall, cpuall_len);
2318 cache += cpuall_len;
2319 } else{
2320 /* shouldn't happen */
2321 fprintf(stderr, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len);
2322 cpuall_len = 0;
2323 }
2324
2325 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
2326 total_len += cpuall_len;
2327 d->cached = 1;
2328 d->size = total_len;
2329 if (total_len > size ) total_len = size;
2330
2331 memcpy(buf, d->buf, total_len);
2332 rv = total_len;
2333
2334 err:
2335 if (f)
2336 fclose(f);
2337 free(line);
2338 free(cpuset);
2339 free(cg);
2340 return rv;
2341 }
2342
2343 static long int getreaperage(pid_t pid)
2344 {
2345 char fnam[100];
2346 struct stat sb;
2347 int ret;
2348 pid_t qpid;
2349
2350 qpid = get_init_pid_for_task(pid);
2351 if (qpid < 0)
2352 return 0;
2353
2354 ret = snprintf(fnam, 100, "/proc/%d", qpid);
2355 if (ret < 0 || ret >= 100)
2356 return 0;
2357
2358 if (lstat(fnam, &sb) < 0)
2359 return 0;
2360
2361 return time(NULL) - sb.st_ctime;
2362 }
2363
2364 /*
2365 * fork a task which switches to @task's namespace and writes '1'.
2366 * over a unix sock so we can read the task's reaper's pid in our
2367 * namespace
2368 */
2369 void write_task_init_pid_exit(int sock, pid_t target)
2370 {
2371 struct ucred cred;
2372 char fnam[100];
2373 pid_t pid;
2374 char v;
2375 int fd, ret;
2376
2377 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", (int)target);
2378 if (ret < 0 || ret >= sizeof(fnam))
2379 _exit(1);
2380
2381 fd = open(fnam, O_RDONLY);
2382 if (fd < 0) {
2383 perror("write_task_init_pid_exit open of ns/pid");
2384 _exit(1);
2385 }
2386 if (setns(fd, 0)) {
2387 perror("write_task_init_pid_exit setns 1");
2388 close(fd);
2389 _exit(1);
2390 }
2391 pid = fork();
2392 if (pid < 0)
2393 _exit(1);
2394 if (pid != 0) {
2395 wait_for_pid(pid);
2396 _exit(0);
2397 }
2398
2399 /* we are the child */
2400 cred.uid = 0;
2401 cred.gid = 0;
2402 cred.pid = 1;
2403 v = '1';
2404 send_creds(sock, &cred, v, true);
2405 _exit(0);
2406 }
2407
2408 static pid_t get_init_pid_for_task(pid_t task)
2409 {
2410 int sock[2];
2411 pid_t pid;
2412 pid_t ret = -1;
2413 char v = '0';
2414 struct ucred cred;
2415
2416 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
2417 perror("socketpair");
2418 return -1;
2419 }
2420
2421 pid = fork();
2422 if (pid < 0)
2423 goto out;
2424 if (!pid) {
2425 close(sock[1]);
2426 write_task_init_pid_exit(sock[0], task);
2427 }
2428
2429 if (!recv_creds(sock[1], &cred, &v))
2430 goto out;
2431 ret = cred.pid;
2432
2433 out:
2434 close(sock[0]);
2435 close(sock[1]);
2436 wait_for_pid(pid);
2437 return ret;
2438 }
2439
2440 static unsigned long get_reaper_busy(pid_t task)
2441 {
2442 pid_t init = get_init_pid_for_task(task);
2443 char *cgroup = NULL, *usage_str = NULL;
2444 unsigned long usage = 0;
2445
2446 if (init == -1)
2447 return 0;
2448
2449 cgroup = get_pid_cgroup(init, "cpuacct");
2450 if (!cgroup)
2451 goto out;
2452 if (!cgfs_get_value("cpuacct", cgroup, "cpuacct.usage", &usage_str))
2453 goto out;
2454 usage = strtoul(usage_str, NULL, 10);
2455 usage /= 1000000000;
2456
2457 out:
2458 free(cgroup);
2459 free(usage_str);
2460 return usage;
2461 }
2462
2463 /*
2464 * We read /proc/uptime and reuse its second field.
2465 * For the first field, we use the mtime for the reaper for
2466 * the calling pid as returned by getreaperage
2467 */
2468 static int proc_uptime_read(char *buf, size_t size, off_t offset,
2469 struct fuse_file_info *fi)
2470 {
2471 struct fuse_context *fc = fuse_get_context();
2472 struct file_info *d = (struct file_info *)fi->fh;
2473 long int reaperage = getreaperage(fc->pid);
2474 unsigned long int busytime = get_reaper_busy(fc->pid), idletime;
2475 char *cache = d->buf;
2476 size_t total_len = 0;
2477
2478 if (offset){
2479 if (offset > d->size)
2480 return -EINVAL;
2481 if (!d->cached)
2482 return 0;
2483 int left = d->size - offset;
2484 total_len = left > size ? size: left;
2485 memcpy(buf, cache + offset, total_len);
2486 return total_len;
2487 }
2488
2489 idletime = reaperage - busytime;
2490 if (idletime > reaperage)
2491 idletime = reaperage;
2492
2493 total_len = snprintf(d->buf, d->size, "%ld.0 %lu.0\n", reaperage, idletime);
2494 if (total_len < 0){
2495 perror("Error writing to cache");
2496 return 0;
2497 }
2498
2499 d->size = (int)total_len;
2500 d->cached = 1;
2501
2502 if (total_len > size) total_len = size;
2503
2504 memcpy(buf, d->buf, total_len);
2505 return total_len;
2506 }
2507
2508 static int proc_diskstats_read(char *buf, size_t size, off_t offset,
2509 struct fuse_file_info *fi)
2510 {
2511 char dev_name[72];
2512 struct fuse_context *fc = fuse_get_context();
2513 struct file_info *d = (struct file_info *)fi->fh;
2514 char *cg;
2515 char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
2516 *io_wait_time_str = NULL, *io_service_time_str = NULL;
2517 unsigned long read = 0, write = 0;
2518 unsigned long read_merged = 0, write_merged = 0;
2519 unsigned long read_sectors = 0, write_sectors = 0;
2520 unsigned long read_ticks = 0, write_ticks = 0;
2521 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
2522 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
2523 char *cache = d->buf;
2524 size_t cache_size = d->buflen;
2525 char *line = NULL;
2526 size_t linelen = 0, total_len = 0, rv = 0;
2527 unsigned int major = 0, minor = 0;
2528 int i = 0;
2529 FILE *f = NULL;
2530
2531 if (offset){
2532 if (offset > d->size)
2533 return -EINVAL;
2534 if (!d->cached)
2535 return 0;
2536 int left = d->size - offset;
2537 total_len = left > size ? size: left;
2538 memcpy(buf, cache + offset, total_len);
2539 return total_len;
2540 }
2541
2542 cg = get_pid_cgroup(fc->pid, "blkio");
2543 if (!cg)
2544 return read_file("/proc/diskstats", buf, size, d);
2545
2546 if (!cgfs_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
2547 goto err;
2548 if (!cgfs_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
2549 goto err;
2550 if (!cgfs_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
2551 goto err;
2552 if (!cgfs_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
2553 goto err;
2554 if (!cgfs_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
2555 goto err;
2556
2557
2558 f = fopen("/proc/diskstats", "r");
2559 if (!f)
2560 goto err;
2561
2562 while (getline(&line, &linelen, f) != -1) {
2563 size_t l;
2564 char *printme, lbuf[256];
2565
2566 i = sscanf(line, "%u %u %71s", &major, &minor, dev_name);
2567 if(i == 3){
2568 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
2569 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
2570 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
2571 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
2572 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
2573 read_sectors = read_sectors/512;
2574 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
2575 write_sectors = write_sectors/512;
2576
2577 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
2578 rd_svctm = rd_svctm/1000000;
2579 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
2580 rd_wait = rd_wait/1000000;
2581 read_ticks = rd_svctm + rd_wait;
2582
2583 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
2584 wr_svctm = wr_svctm/1000000;
2585 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
2586 wr_wait = wr_wait/1000000;
2587 write_ticks = wr_svctm + wr_wait;
2588
2589 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
2590 tot_ticks = tot_ticks/1000000;
2591 }else{
2592 continue;
2593 }
2594
2595 memset(lbuf, 0, 256);
2596 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
2597 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
2598 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
2599 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
2600 printme = lbuf;
2601 } else
2602 continue;
2603
2604 l = snprintf(cache, cache_size, "%s", printme);
2605 if (l < 0) {
2606 perror("Error writing to fuse buf");
2607 rv = 0;
2608 goto err;
2609 }
2610 if (l >= cache_size) {
2611 fprintf(stderr, "Internal error: truncated write to cache\n");
2612 rv = 0;
2613 goto err;
2614 }
2615 cache += l;
2616 cache_size -= l;
2617 total_len += l;
2618 }
2619
2620 d->cached = 1;
2621 d->size = total_len;
2622 if (total_len > size ) total_len = size;
2623 memcpy(buf, d->buf, total_len);
2624
2625 rv = total_len;
2626 err:
2627 free(cg);
2628 if (f)
2629 fclose(f);
2630 free(line);
2631 free(io_serviced_str);
2632 free(io_merged_str);
2633 free(io_service_bytes_str);
2634 free(io_wait_time_str);
2635 free(io_service_time_str);
2636 return rv;
2637 }
2638
2639 static off_t get_procfile_size(const char *which)
2640 {
2641 FILE *f = fopen(which, "r");
2642 char *line = NULL;
2643 size_t len = 0;
2644 ssize_t sz, answer = 0;
2645 if (!f)
2646 return 0;
2647
2648 while ((sz = getline(&line, &len, f)) != -1)
2649 answer += sz;
2650 fclose (f);
2651 free(line);
2652
2653 return answer;
2654 }
2655
2656 static int proc_getattr(const char *path, struct stat *sb)
2657 {
2658 struct timespec now;
2659
2660 memset(sb, 0, sizeof(struct stat));
2661 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
2662 return -EINVAL;
2663 sb->st_uid = sb->st_gid = 0;
2664 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
2665 if (strcmp(path, "/proc") == 0) {
2666 sb->st_mode = S_IFDIR | 00555;
2667 sb->st_nlink = 2;
2668 return 0;
2669 }
2670 if (strcmp(path, "/proc/meminfo") == 0 ||
2671 strcmp(path, "/proc/cpuinfo") == 0 ||
2672 strcmp(path, "/proc/uptime") == 0 ||
2673 strcmp(path, "/proc/stat") == 0 ||
2674 strcmp(path, "/proc/diskstats") == 0) {
2675 sb->st_size = 0;
2676 sb->st_mode = S_IFREG | 00444;
2677 sb->st_nlink = 1;
2678 return 0;
2679 }
2680
2681 return -ENOENT;
2682 }
2683
2684 static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2685 struct fuse_file_info *fi)
2686 {
2687 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
2688 filler(buf, "meminfo", NULL, 0) != 0 ||
2689 filler(buf, "stat", NULL, 0) != 0 ||
2690 filler(buf, "uptime", NULL, 0) != 0 ||
2691 filler(buf, "diskstats", NULL, 0) != 0)
2692 return -EINVAL;
2693 return 0;
2694 }
2695
2696 static int proc_open(const char *path, struct fuse_file_info *fi)
2697 {
2698 int type = -1;
2699 struct file_info *info;
2700
2701 if (strcmp(path, "/proc/meminfo") == 0)
2702 type = LXC_TYPE_PROC_MEMINFO;
2703 else if (strcmp(path, "/proc/cpuinfo") == 0)
2704 type = LXC_TYPE_PROC_CPUINFO;
2705 else if (strcmp(path, "/proc/uptime") == 0)
2706 type = LXC_TYPE_PROC_UPTIME;
2707 else if (strcmp(path, "/proc/stat") == 0)
2708 type = LXC_TYPE_PROC_STAT;
2709 else if (strcmp(path, "/proc/diskstats") == 0)
2710 type = LXC_TYPE_PROC_DISKSTATS;
2711 if (type == -1)
2712 return -ENOENT;
2713
2714 info = malloc(sizeof(*info));
2715 if (!info)
2716 return -ENOMEM;
2717
2718 memset(info, 0, sizeof(*info));
2719 info->type = type;
2720
2721 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
2722 do {
2723 info->buf = malloc(info->buflen);
2724 } while (!info->buf);
2725 memset(info->buf, 0, info->buflen);
2726 /* set actual size to buffer size */
2727 info->size = info->buflen;
2728
2729 fi->fh = (unsigned long)info;
2730 return 0;
2731 }
2732
2733 static int proc_release(const char *path, struct fuse_file_info *fi)
2734 {
2735 struct file_info *f = (struct file_info *)fi->fh;
2736
2737 do_release_file_info(f);
2738 return 0;
2739 }
2740
2741 static int proc_read(const char *path, char *buf, size_t size, off_t offset,
2742 struct fuse_file_info *fi)
2743 {
2744 struct file_info *f = (struct file_info *) fi->fh;
2745
2746 switch (f->type) {
2747 case LXC_TYPE_PROC_MEMINFO:
2748 return proc_meminfo_read(buf, size, offset, fi);
2749 case LXC_TYPE_PROC_CPUINFO:
2750 return proc_cpuinfo_read(buf, size, offset, fi);
2751 case LXC_TYPE_PROC_UPTIME:
2752 return proc_uptime_read(buf, size, offset, fi);
2753 case LXC_TYPE_PROC_STAT:
2754 return proc_stat_read(buf, size, offset, fi);
2755 case LXC_TYPE_PROC_DISKSTATS:
2756 return proc_diskstats_read(buf, size, offset, fi);
2757 default:
2758 return -EINVAL;
2759 }
2760 }
2761
2762 /*
2763 * FUSE ops for /
2764 * these just delegate to the /proc and /cgroup ops as
2765 * needed
2766 */
2767
2768 static int lxcfs_getattr(const char *path, struct stat *sb)
2769 {
2770 if (strcmp(path, "/") == 0) {
2771 sb->st_mode = S_IFDIR | 00755;
2772 sb->st_nlink = 2;
2773 return 0;
2774 }
2775 if (strncmp(path, "/cgroup", 7) == 0) {
2776 return cg_getattr(path, sb);
2777 }
2778 if (strncmp(path, "/proc", 5) == 0) {
2779 return proc_getattr(path, sb);
2780 }
2781 return -EINVAL;
2782 }
2783
2784 static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
2785 {
2786 if (strcmp(path, "/") == 0)
2787 return 0;
2788
2789 if (strncmp(path, "/cgroup", 7) == 0) {
2790 return cg_opendir(path, fi);
2791 }
2792 if (strcmp(path, "/proc") == 0)
2793 return 0;
2794 return -ENOENT;
2795 }
2796
2797 static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2798 struct fuse_file_info *fi)
2799 {
2800 if (strcmp(path, "/") == 0) {
2801 if (filler(buf, "proc", NULL, 0) != 0 ||
2802 filler(buf, "cgroup", NULL, 0) != 0)
2803 return -EINVAL;
2804 return 0;
2805 }
2806 if (strncmp(path, "/cgroup", 7) == 0)
2807 return cg_readdir(path, buf, filler, offset, fi);
2808 if (strcmp(path, "/proc") == 0)
2809 return proc_readdir(path, buf, filler, offset, fi);
2810 return -EINVAL;
2811 }
2812
2813 static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
2814 {
2815 if (strcmp(path, "/") == 0)
2816 return 0;
2817 if (strncmp(path, "/cgroup", 7) == 0) {
2818 return cg_releasedir(path, fi);
2819 }
2820 if (strcmp(path, "/proc") == 0)
2821 return 0;
2822 return -EINVAL;
2823 }
2824
2825 static int lxcfs_open(const char *path, struct fuse_file_info *fi)
2826 {
2827 if (strncmp(path, "/cgroup", 7) == 0)
2828 return cg_open(path, fi);
2829 if (strncmp(path, "/proc", 5) == 0)
2830 return proc_open(path, fi);
2831
2832 return -EINVAL;
2833 }
2834
2835 static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
2836 struct fuse_file_info *fi)
2837 {
2838 if (strncmp(path, "/cgroup", 7) == 0)
2839 return cg_read(path, buf, size, offset, fi);
2840 if (strncmp(path, "/proc", 5) == 0)
2841 return proc_read(path, buf, size, offset, fi);
2842
2843 return -EINVAL;
2844 }
2845
2846 int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
2847 struct fuse_file_info *fi)
2848 {
2849 if (strncmp(path, "/cgroup", 7) == 0) {
2850 return cg_write(path, buf, size, offset, fi);
2851 }
2852
2853 return -EINVAL;
2854 }
2855
2856 static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
2857 {
2858 return 0;
2859 }
2860
2861 static int lxcfs_release(const char *path, struct fuse_file_info *fi)
2862 {
2863 if (strncmp(path, "/cgroup", 7) == 0)
2864 return cg_release(path, fi);
2865 if (strncmp(path, "/proc", 5) == 0)
2866 return proc_release(path, fi);
2867
2868 return -EINVAL;
2869 }
2870
2871 static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
2872 {
2873 return 0;
2874 }
2875
2876 int lxcfs_mkdir(const char *path, mode_t mode)
2877 {
2878 if (strncmp(path, "/cgroup", 7) == 0)
2879 return cg_mkdir(path, mode);
2880
2881 return -EINVAL;
2882 }
2883
2884 int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
2885 {
2886 if (strncmp(path, "/cgroup", 7) == 0)
2887 return cg_chown(path, uid, gid);
2888
2889 return -EINVAL;
2890 }
2891
2892 /*
2893 * cat first does a truncate before doing ops->write. This doesn't
2894 * really make sense for cgroups. So just return 0 always but do
2895 * nothing.
2896 */
2897 int lxcfs_truncate(const char *path, off_t newsize)
2898 {
2899 if (strncmp(path, "/cgroup", 7) == 0)
2900 return 0;
2901 return -EINVAL;
2902 }
2903
2904 int lxcfs_rmdir(const char *path)
2905 {
2906 if (strncmp(path, "/cgroup", 7) == 0)
2907 return cg_rmdir(path);
2908 return -EINVAL;
2909 }
2910
2911 int lxcfs_chmod(const char *path, mode_t mode)
2912 {
2913 if (strncmp(path, "/cgroup", 7) == 0)
2914 return cg_chmod(path, mode);
2915 return -EINVAL;
2916 }
2917
2918 const struct fuse_operations lxcfs_ops = {
2919 .getattr = lxcfs_getattr,
2920 .readlink = NULL,
2921 .getdir = NULL,
2922 .mknod = NULL,
2923 .mkdir = lxcfs_mkdir,
2924 .unlink = NULL,
2925 .rmdir = lxcfs_rmdir,
2926 .symlink = NULL,
2927 .rename = NULL,
2928 .link = NULL,
2929 .chmod = lxcfs_chmod,
2930 .chown = lxcfs_chown,
2931 .truncate = lxcfs_truncate,
2932 .utime = NULL,
2933
2934 .open = lxcfs_open,
2935 .read = lxcfs_read,
2936 .release = lxcfs_release,
2937 .write = lxcfs_write,
2938
2939 .statfs = NULL,
2940 .flush = lxcfs_flush,
2941 .fsync = lxcfs_fsync,
2942
2943 .setxattr = NULL,
2944 .getxattr = NULL,
2945 .listxattr = NULL,
2946 .removexattr = NULL,
2947
2948 .opendir = lxcfs_opendir,
2949 .readdir = lxcfs_readdir,
2950 .releasedir = lxcfs_releasedir,
2951
2952 .fsyncdir = NULL,
2953 .init = NULL,
2954 .destroy = NULL,
2955 .access = NULL,
2956 .create = NULL,
2957 .ftruncate = NULL,
2958 .fgetattr = NULL,
2959 };
2960
2961 static void usage(const char *me)
2962 {
2963 fprintf(stderr, "Usage:\n");
2964 fprintf(stderr, "\n");
2965 fprintf(stderr, "%s mountpoint\n", me);
2966 fprintf(stderr, "%s -h\n", me);
2967 exit(1);
2968 }
2969
2970 static bool is_help(char *w)
2971 {
2972 if (strcmp(w, "-h") == 0 ||
2973 strcmp(w, "--help") == 0 ||
2974 strcmp(w, "-help") == 0 ||
2975 strcmp(w, "help") == 0)
2976 return true;
2977 return false;
2978 }
2979
2980 void swallow_arg(int *argcp, char *argv[], char *which)
2981 {
2982 int i;
2983
2984 for (i = 1; argv[i]; i++) {
2985 if (strcmp(argv[i], which) != 0)
2986 continue;
2987 for (; argv[i]; i++) {
2988 argv[i] = argv[i+1];
2989 }
2990 (*argcp)--;
2991 return;
2992 }
2993 }
2994
2995 void swallow_option(int *argcp, char *argv[], char *opt, char *v)
2996 {
2997 int i;
2998
2999 for (i = 1; argv[i]; i++) {
3000 if (!argv[i+1])
3001 continue;
3002 if (strcmp(argv[i], opt) != 0)
3003 continue;
3004 if (strcmp(argv[i+1], v) != 0) {
3005 fprintf(stderr, "Warning: unexpected fuse option %s\n", v);
3006 exit(1);
3007 }
3008 for (; argv[i+1]; i++) {
3009 argv[i] = argv[i+2];
3010 }
3011 (*argcp) -= 2;
3012 return;
3013 }
3014 }
3015
3016 int main(int argc, char *argv[])
3017 {
3018 int ret = -1;
3019 /*
3020 * what we pass to fuse_main is:
3021 * argv[0] -s -f -o allow_other,directio argv[1] NULL
3022 */
3023 int nargs = 5, cnt = 0;
3024 char *newargv[6];
3025
3026 #ifdef FORTRAVIS
3027 /* for travis which runs on 12.04 */
3028 if (glib_check_version (2, 36, 0) != NULL)
3029 g_type_init ();
3030 #endif
3031
3032 /* accomodate older init scripts */
3033 swallow_arg(&argc, argv, "-s");
3034 swallow_arg(&argc, argv, "-f");
3035 swallow_option(&argc, argv, "-o", "allow_other");
3036
3037 if (argc == 2 && strcmp(argv[1], "--version") == 0) {
3038 fprintf(stderr, "%s\n", VERSION);
3039 exit(0);
3040 }
3041 if (argc != 2 || is_help(argv[1]))
3042 usage(argv[0]);
3043
3044 newargv[cnt++] = argv[0];
3045 newargv[cnt++] = "-f";
3046 newargv[cnt++] = "-o";
3047 newargv[cnt++] = "allow_other,direct_io,entry_timeout=0.5,attr_timeout=0.5";
3048 newargv[cnt++] = argv[1];
3049 newargv[cnt++] = NULL;
3050
3051 if (!cgfs_setup_controllers())
3052 goto out;
3053
3054 ret = fuse_main(nargs, newargv, &lxcfs_ops, NULL);
3055
3056 out:
3057 return ret;
3058 }