]> git.proxmox.com Git - mirror_lxcfs.git/blame - lxcfs.c
fix two threading issues
[mirror_lxcfs.git] / lxcfs.c
CommitLineData
758ad80c
SH
1/* lxcfs
2 *
3 * Copyright © 2014 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
f2799430 6 * See COPYING file for details.
758ad80c
SH
7 */
8
758ad80c
SH
9#define FUSE_USE_VERSION 26
10
2183082c 11#include <stdio.h>
758ad80c
SH
12#include <dirent.h>
13#include <fcntl.h>
14#include <fuse.h>
15#include <unistd.h>
16#include <errno.h>
17#include <stdbool.h>
18#include <time.h>
19#include <string.h>
20#include <stdlib.h>
21#include <libgen.h>
41bb9357
SH
22#include <sched.h>
23#include <linux/sched.h>
a05660a6 24#include <sys/socket.h>
41bb9357
SH
25#include <sys/mount.h>
26#include <wait.h>
758ad80c 27
67bd113f 28#include <nih-dbus/dbus_connection.h>
758ad80c
SH
29#include <nih/alloc.h>
30#include <nih/string.h>
38a76a91 31#include <nih/error.h>
758ad80c
SH
32
33#include "cgmanager.h"
2e9c0b32 34#include "config.h" // for VERSION
758ad80c
SH
35
36struct lxcfs_state {
37 /*
38 * a null-terminated, nih-allocated list of the mounted subsystems. We
39 * detect this at startup.
40 */
41 char **subsystems;
42};
43#define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data)
44
443d13f5
SH
45enum {
46 LXC_TYPE_CGDIR,
47 LXC_TYPE_CGFILE,
48 LXC_TYPE_PROC_MEMINFO,
49 LXC_TYPE_PROC_CPUINFO,
50 LXC_TYPE_PROC_UPTIME,
51 LXC_TYPE_PROC_STAT,
52 LXC_TYPE_PROC_DISKSTATS,
53};
54
c688e1b3
SH
55struct file_info {
56 char *controller;
57 char *cgroup;
8f6e8f5e 58 char *file;
443d13f5 59 int type;
c688e1b3
SH
60 char *buf; // unused as of yet
61 int buflen;
97f1f27b 62 int size; //actual data size
c688e1b3
SH
63};
64
97f1f27b
YY
65/* reserve buffer size, for cpuall in /proc/stat */
66#define BUF_RESERVE_SIZE 256
67
bae07053 68static char *must_copy_string(void *parent, const char *str)
c688e1b3
SH
69{
70 if (!str)
71 return NULL;
bae07053 72 return NIH_MUST( nih_strdup(parent, str) );
c688e1b3
SH
73}
74
4775fba1
SH
75/*
76 * TODO - return value should denote whether child exited with failure
77 * so callers can return errors. Esp read/write of tasks and cgroup.procs
78 */
a05660a6
SH
79static int wait_for_pid(pid_t pid)
80{
81 int status, ret;
82
83again:
84 ret = waitpid(pid, &status, 0);
85 if (ret == -1) {
86 if (errno == EINTR)
87 goto again;
88 return -1;
89 }
90 if (ret != pid)
91 goto again;
92 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
93 return -1;
94 return 0;
95}
96
053a659d
SH
97/*
98 * Given a open file * to /proc/pid/{u,g}id_map, and an id
99 * valid in the caller's namespace, return the id mapped into
100 * pid's namespace.
101 * Returns the mapped id, or -1 on error.
102 */
103unsigned int
104convert_id_to_ns(FILE *idfile, unsigned int in_id)
105{
106 unsigned int nsuid, // base id for a range in the idfile's namespace
107 hostuid, // base id for a range in the caller's namespace
108 count; // number of ids in this range
109 char line[400];
110 int ret;
111
112 fseek(idfile, 0L, SEEK_SET);
113 while (fgets(line, 400, idfile)) {
114 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
115 if (ret != 3)
116 continue;
117 if (hostuid + count < hostuid || nsuid + count < nsuid) {
118 /*
119 * uids wrapped around - unexpected as this is a procfile,
120 * so just bail.
121 */
647c89e5 122 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
053a659d
SH
123 nsuid, hostuid, count, line);
124 return -1;
125 }
126 if (hostuid <= in_id && hostuid+count > in_id) {
127 /*
128 * now since hostuid <= in_id < hostuid+count, and
129 * hostuid+count and nsuid+count do not wrap around,
130 * we know that nsuid+(in_id-hostuid) which must be
131 * less that nsuid+(count) must not wrap around
132 */
133 return (in_id - hostuid) + nsuid;
134 }
135 }
136
137 // no answer found
138 return -1;
139}
140
341b21ad
SH
141/*
142 * for is_privileged_over,
143 * specify whether we require the calling uid to be root in his
144 * namespace
145 */
146#define NS_ROOT_REQD true
147#define NS_ROOT_OPT false
148
149static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
758ad80c 150{
053a659d
SH
151 nih_local char *fpath = NULL;
152 bool answer = false;
153 uid_t nsuid;
154
341b21ad
SH
155 if (victim == -1 || uid == -1)
156 return false;
157
158 /*
159 * If the request is one not requiring root in the namespace,
160 * then having the same uid suffices. (i.e. uid 1000 has write
161 * access to files owned by uid 1000
162 */
163 if (!req_ns_root && uid == victim)
758ad80c
SH
164 return true;
165
053a659d
SH
166 fpath = NIH_MUST( nih_sprintf(NULL, "/proc/%d/uid_map", pid) );
167 FILE *f = fopen(fpath, "r");
168 if (!f)
169 return false;
170
341b21ad 171 /* if caller's not root in his namespace, reject */
053a659d
SH
172 nsuid = convert_id_to_ns(f, uid);
173 if (nsuid)
174 goto out;
175
341b21ad
SH
176 /*
177 * If victim is not mapped into caller's ns, reject.
178 * XXX I'm not sure this check is needed given that fuse
179 * will be sending requests where the vfs has converted
180 */
053a659d
SH
181 nsuid = convert_id_to_ns(f, victim);
182 if (nsuid == -1)
183 goto out;
184
185 answer = true;
186
187out:
188 fclose(f);
189 return answer;
758ad80c
SH
190}
191
192static bool perms_include(int fmode, mode_t req_mode)
193{
2ad6d2bd
SH
194 mode_t r;
195
196 switch (req_mode & O_ACCMODE) {
197 case O_RDONLY:
198 r = S_IROTH;
199 break;
200 case O_WRONLY:
201 r = S_IWOTH;
202 break;
203 case O_RDWR:
204 r = S_IROTH | S_IWOTH;
205 break;
206 default:
207 return false;
208 }
209 return ((fmode & r) == r);
758ad80c
SH
210}
211
3db25a35
SH
212static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
213{
214 char *start, *end;
215
216 if (strlen(taskcg) <= strlen(querycg)) {
217 fprintf(stderr, "%s: I was fed bad input\n", __func__);
218 return NULL;
219 }
220
221 if (strcmp(querycg, "/") == 0)
222 start = NIH_MUST( nih_strdup(NULL, taskcg + 1) );
223 else
224 start = NIH_MUST( nih_strdup(NULL, taskcg + strlen(querycg) + 1) );
225 end = strchr(start, '/');
226 if (end)
227 *end = '\0';
228 return start;
229}
230
758ad80c
SH
231/*
232 * check whether a fuse context may access a cgroup dir or file
233 *
234 * If file is not null, it is a cgroup file to check under cg.
235 * If file is null, then we are checking perms on cg itself.
236 *
237 * For files we can check the mode of the list_keys result.
238 * For cgroups, we must make assumptions based on the files under the
239 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
240 * yet.
241 */
242static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
243{
244 nih_local struct cgm_keys **list = NULL;
245 int i;
246
247 if (!file)
248 file = "tasks";
249
250 if (*file == '/')
251 file++;
252
253 if (!cgm_list_keys(contrl, cg, &list))
254 return false;
255 for (i = 0; list[i]; i++) {
256 if (strcmp(list[i]->name, file) == 0) {
257 struct cgm_keys *k = list[i];
341b21ad 258 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
758ad80c
SH
259 if (perms_include(k->mode >> 6, mode))
260 return true;
261 }
262 if (fc->gid == k->gid) {
263 if (perms_include(k->mode >> 3, mode))
264 return true;
265 }
266 return perms_include(k->mode, mode);
267 }
268 }
269
270 return false;
271}
272
3db25a35
SH
273static void stripnewline(char *x)
274{
275 size_t l = strlen(x);
276 if (l && x[l-1] == '\n')
277 x[l-1] = '\0';
278}
279
280/*
281 * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
282 * If caller is in /a, he may act on /a/b, but not on /b.
283 * if the answer is false and nextcg is not NULL, then *nextcg will point
284 * to a nih_alloc'd string containing the next cgroup directory under cg
285 */
286static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
287{
288 nih_local char *fnam = NULL;
289 FILE *f;
290 bool answer = false;
291 char *line = NULL;
292 size_t len = 0;
293
294 fnam = NIH_MUST( nih_sprintf(NULL, "/proc/%d/cgroup", pid) );
295 if (!(f = fopen(fnam, "r")))
296 return false;
297
298 while (getline(&line, &len, f) != -1) {
299 char *c1, *c2, *linecmp;
300 if (!line[0])
301 continue;
302 c1 = strchr(line, ':');
303 if (!c1)
304 goto out;
305 c1++;
306 c2 = strchr(c1, ':');
307 if (!c2)
308 goto out;
309 *c2 = '\0';
310 if (strcmp(c1, contrl) != 0)
311 continue;
312 c2++;
313 stripnewline(c2);
314 /*
315 * callers pass in '/' for root cgroup, otherwise they pass
316 * in a cgroup without leading '/'
317 */
318 linecmp = *cg == '/' ? c2 : c2+1;
319 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
320 if (nextcg)
321 *nextcg = get_next_cgroup_dir(linecmp, cg);
322 goto out;
323 }
324 answer = true;
325 goto out;
326 }
327
328out:
329 fclose(f);
330 free(line);
331 return answer;
332}
333
758ad80c
SH
334/*
335 * given /cgroup/freezer/a/b, return "freezer". this will be nih-allocated
336 * and needs to be nih_freed.
337 */
338static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
339{
340 const char *p1;
341 char *ret, *slash;
342
343 if (strlen(path) < 9)
344 return NULL;
ac5d9d48
SH
345 if (*(path+7) != '/')
346 return NULL;
758ad80c
SH
347 p1 = path+8;
348 ret = nih_strdup(NULL, p1);
349 if (!ret)
350 return ret;
351 slash = strstr(ret, "/");
352 if (slash)
353 *slash = '\0';
354
355 /* verify that it is a subsystem */
356 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
357 int i;
358 if (!list) {
359 nih_free(ret);
360 return NULL;
361 }
362 for (i = 0; list[i]; i++) {
363 if (strcmp(list[i], ret) == 0)
364 return ret;
365 }
366 nih_free(ret);
367 return NULL;
368}
369
370/*
371 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
372 * Note that the returned value may include files (keynames) etc
373 */
374static const char *find_cgroup_in_path(const char *path)
375{
376 const char *p1;
377
378 if (strlen(path) < 9)
379 return NULL;
380 p1 = strstr(path+8, "/");
381 if (!p1)
382 return NULL;
383 return p1+1;
384}
385
386static bool is_child_cgroup(const char *contr, const char *dir, const char *f)
387{
388 nih_local char **list = NULL;
389 int i;
390
391 if (!f)
392 return false;
393 if (*f == '/')
394 f++;
395
396 if (!cgm_list_children(contr, dir, &list))
397 return false;
398 for (i = 0; list[i]; i++) {
399 if (strcmp(list[i], f) == 0)
400 return true;
401 }
402
403 return false;
404}
405
406static struct cgm_keys *get_cgroup_key(const char *contr, const char *dir, const char *f)
407{
408 nih_local struct cgm_keys **list = NULL;
409 struct cgm_keys *k;
410 int i;
411
412 if (!f)
413 return NULL;
414 if (*f == '/')
415 f++;
416 if (!cgm_list_keys(contr, dir, &list))
417 return NULL;
418 for (i = 0; list[i]; i++) {
419 if (strcmp(list[i]->name, f) == 0) {
420 k = NIH_MUST( nih_alloc(NULL, (sizeof(*k))) );
421 k->name = NIH_MUST( nih_strdup(k, list[i]->name) );
422 k->uid = list[i]->uid;
423 k->gid = list[i]->gid;
424 k->mode = list[i]->mode;
425 return k;
426 }
427 }
428
429 return NULL;
430}
431
432static void get_cgdir_and_path(const char *cg, char **dir, char **file)
433{
758ad80c
SH
434 char *p;
435
436 *dir = NIH_MUST( nih_strdup(NULL, cg) );
437 *file = strrchr(cg, '/');
438 if (!*file) {
439 *file = NULL;
440 return;
441 }
442 p = strrchr(*dir, '/');
443 *p = '\0';
444}
445
446/*
2ad6d2bd 447 * FUSE ops for /cgroup
758ad80c 448 */
2ad6d2bd 449
758ad80c
SH
450static int cg_getattr(const char *path, struct stat *sb)
451{
452 struct timespec now;
453 struct fuse_context *fc = fuse_get_context();
454 nih_local char * cgdir = NULL;
455 char *fpath = NULL, *path1, *path2;
456 nih_local struct cgm_keys *k = NULL;
457 const char *cgroup;
458 nih_local char *controller = NULL;
459
460
461 if (!fc)
462 return -EIO;
463
464 memset(sb, 0, sizeof(struct stat));
465
466 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
467 return -EINVAL;
468
469 sb->st_uid = sb->st_gid = 0;
470 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
471 sb->st_size = 0;
472
473 if (strcmp(path, "/cgroup") == 0) {
474 sb->st_mode = S_IFDIR | 00755;
475 sb->st_nlink = 2;
476 return 0;
477 }
478
479 controller = pick_controller_from_path(fc, path);
480 if (!controller)
481 return -EIO;
758ad80c
SH
482 cgroup = find_cgroup_in_path(path);
483 if (!cgroup) {
484 /* this is just /cgroup/controller, return it as a dir */
485 sb->st_mode = S_IFDIR | 00755;
486 sb->st_nlink = 2;
487 return 0;
488 }
341b21ad 489
758ad80c
SH
490 get_cgdir_and_path(cgroup, &cgdir, &fpath);
491
492 if (!fpath) {
493 path1 = "/";
494 path2 = cgdir;
495 } else {
496 path1 = cgdir;
497 path2 = fpath;
498 }
499
758ad80c
SH
500 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
501 * Then check that caller's cgroup is under path if fpath is a child
502 * cgroup, or cgdir if fpath is a file */
503
504 if (is_child_cgroup(controller, path1, path2)) {
f9a05025
SH
505 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
506 /* this is just /cgroup/controller, return it as a dir */
507 sb->st_mode = S_IFDIR | 00555;
508 sb->st_nlink = 2;
509 return 0;
510 }
758ad80c 511 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
f9a05025 512 return -EACCES;
758ad80c 513
053a659d
SH
514 // get uid, gid, from '/tasks' file and make up a mode
515 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
516 sb->st_mode = S_IFDIR | 00755;
517 k = get_cgroup_key(controller, cgroup, "tasks");
518 if (!k) {
053a659d
SH
519 sb->st_uid = sb->st_gid = 0;
520 } else {
053a659d
SH
521 sb->st_uid = k->uid;
522 sb->st_gid = k->gid;
523 }
758ad80c
SH
524 sb->st_nlink = 2;
525 return 0;
526 }
527
528 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
3db25a35
SH
529 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL))
530 return -ENOENT;
758ad80c 531 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY))
f9a05025 532 return -EACCES;
758ad80c 533
758ad80c 534 sb->st_mode = S_IFREG | k->mode;
053a659d 535 sb->st_nlink = 1;
758ad80c
SH
536 sb->st_uid = k->uid;
537 sb->st_gid = k->gid;
7253e0a4 538 sb->st_size = 0;
758ad80c
SH
539 return 0;
540 }
541
ab54b798 542 return -ENOENT;
758ad80c 543}
2183082c 544
7f163b71
SH
545/*
546 * TODO - cache these results in a table for use in opendir, free
547 * in releasedir
548 */
758ad80c 549static int cg_opendir(const char *path, struct fuse_file_info *fi)
2183082c 550{
7f163b71
SH
551 struct fuse_context *fc = fuse_get_context();
552 nih_local struct cgm_keys **list = NULL;
553 const char *cgroup;
c688e1b3 554 struct file_info *dir_info;
7f163b71 555 nih_local char *controller = NULL;
7f163b71
SH
556
557 if (!fc)
558 return -EIO;
559
c688e1b3
SH
560 if (strcmp(path, "/cgroup") == 0) {
561 cgroup = NULL;
562 controller = NULL;
563 } else {
564 // return list of keys for the controller, and list of child cgroups
565 controller = pick_controller_from_path(fc, path);
566 if (!controller)
567 return -EIO;
7f163b71 568
c688e1b3
SH
569 cgroup = find_cgroup_in_path(path);
570 if (!cgroup) {
571 /* this is just /cgroup/controller, return its contents */
572 cgroup = "/";
573 }
7f163b71
SH
574 }
575
3a6e1a76 576 if (cgroup && !fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
7f163b71 577 return -EACCES;
c688e1b3
SH
578
579 /* we'll free this at cg_releasedir */
580 dir_info = NIH_MUST( nih_alloc(NULL, sizeof(*dir_info)) );
bae07053
SH
581 dir_info->controller = must_copy_string(dir_info, controller);
582 dir_info->cgroup = must_copy_string(dir_info, cgroup);
443d13f5 583 dir_info->type = LXC_TYPE_CGDIR;
c688e1b3 584 dir_info->buf = NULL;
8f6e8f5e 585 dir_info->file = NULL;
c688e1b3
SH
586 dir_info->buflen = 0;
587
588 fi->fh = (unsigned long)dir_info;
758ad80c
SH
589 return 0;
590}
591
758ad80c
SH
592static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
593 struct fuse_file_info *fi)
594{
c688e1b3
SH
595 struct file_info *d = (struct file_info *)fi->fh;
596 nih_local struct cgm_keys **list = NULL;
597 int i;
598 nih_local char *nextcg = NULL;
758ad80c
SH
599 struct fuse_context *fc = fuse_get_context();
600
443d13f5 601 if (d->type != LXC_TYPE_CGDIR) {
b845ad01
SH
602 fprintf(stderr, "Internal error: file cache info used in readdir\n");
603 return -EIO;
604 }
c688e1b3
SH
605 if (!d->cgroup && !d->controller) {
606 // ls /var/lib/lxcfs/cgroup - just show list of controllers
758ad80c
SH
607 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
608 int i;
609
610 if (!list)
611 return -EIO;
7f163b71 612
758ad80c
SH
613 for (i = 0; list[i]; i++) {
614 if (filler(buf, list[i], NULL, 0) != 0) {
615 return -EIO;
616 }
617 }
618 return 0;
619 }
620
c688e1b3 621 if (!cgm_list_keys(d->controller, d->cgroup, &list))
3db25a35 622 // not a valid cgroup
758ad80c 623 return -EINVAL;
3db25a35 624
c688e1b3 625 if (!caller_is_in_ancestor(fc->pid, d->controller, d->cgroup, &nextcg)) {
3db25a35
SH
626 if (nextcg) {
627 int ret;
628 ret = filler(buf, nextcg, NULL, 0);
629 if (ret != 0)
630 return -EIO;
631 }
632 return 0;
633 }
634
758ad80c 635 for (i = 0; list[i]; i++) {
758ad80c
SH
636 if (filler(buf, list[i]->name, NULL, 0) != 0) {
637 return -EIO;
638 }
639 }
640
641 // now get the list of child cgroups
422aa4a5 642 nih_local char **clist = NULL;
758ad80c 643
c688e1b3 644 if (!cgm_list_children(d->controller, d->cgroup, &clist))
758ad80c
SH
645 return 0;
646 for (i = 0; clist[i]; i++) {
758ad80c
SH
647 if (filler(buf, clist[i], NULL, 0) != 0) {
648 return -EIO;
649 }
650 }
651 return 0;
652}
653
8f6e8f5e
SH
654static void do_release_file_info(struct file_info *f)
655{
bae07053
SH
656 /*
657 * all file_info fields which are nih_alloc()d with f as parent
658 * will be automatically freed
659 */
8f6e8f5e
SH
660 nih_free(f);
661}
662
758ad80c
SH
663static int cg_releasedir(const char *path, struct fuse_file_info *fi)
664{
c688e1b3
SH
665 struct file_info *d = (struct file_info *)fi->fh;
666
8f6e8f5e 667 do_release_file_info(d);
758ad80c
SH
668 return 0;
669}
670
99978832
SH
671static int cg_open(const char *path, struct fuse_file_info *fi)
672{
673 nih_local char *controller = NULL;
674 const char *cgroup;
675 char *fpath = NULL, *path1, *path2;
676 nih_local char * cgdir = NULL;
677 nih_local struct cgm_keys *k = NULL;
8f6e8f5e 678 struct file_info *file_info;
99978832
SH
679 struct fuse_context *fc = fuse_get_context();
680
681 if (!fc)
682 return -EIO;
683
684 controller = pick_controller_from_path(fc, path);
685 if (!controller)
686 return -EIO;
687 cgroup = find_cgroup_in_path(path);
688 if (!cgroup)
689 return -EINVAL;
690
691 get_cgdir_and_path(cgroup, &cgdir, &fpath);
692 if (!fpath) {
693 path1 = "/";
694 path2 = cgdir;
695 } else {
696 path1 = cgdir;
697 path2 = fpath;
698 }
699
8f6e8f5e
SH
700 k = get_cgroup_key(controller, path1, path2);
701 if (!k)
702 return -EINVAL;
99978832 703
8f6e8f5e
SH
704 if (!fc_may_access(fc, controller, path1, path2, fi->flags))
705 // should never get here
706 return -EACCES;
99978832 707
8f6e8f5e
SH
708 /* we'll free this at cg_release */
709 file_info = NIH_MUST( nih_alloc(NULL, sizeof(*file_info)) );
bae07053
SH
710 file_info->controller = must_copy_string(file_info, controller);
711 file_info->cgroup = must_copy_string(file_info, path1);
712 file_info->file = must_copy_string(file_info, path2);
443d13f5 713 file_info->type = LXC_TYPE_CGFILE;
8f6e8f5e
SH
714 file_info->buf = NULL;
715 file_info->buflen = 0;
716
717 fi->fh = (unsigned long)file_info;
718 return 0;
719}
720
721static int cg_release(const char *path, struct fuse_file_info *fi)
722{
723 struct file_info *f = (struct file_info *)fi->fh;
724
725 do_release_file_info(f);
726 return 0;
99978832
SH
727}
728
a05660a6
SH
729static int msgrecv(int sockfd, void *buf, size_t len)
730{
731 struct timeval tv;
732 fd_set rfds;
733
734 FD_ZERO(&rfds);
735 FD_SET(sockfd, &rfds);
736 tv.tv_sec = 2;
737 tv.tv_usec = 0;
738
ea56f722 739 if (select(sockfd+1, &rfds, NULL, NULL, &tv) <= 0)
a05660a6
SH
740 return -1;
741 return recv(sockfd, buf, len, MSG_DONTWAIT);
742}
743
01e71852
SH
744#define SEND_CREDS_OK 0
745#define SEND_CREDS_NOTSK 1
746#define SEND_CREDS_FAIL 2
747static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
a05660a6
SH
748{
749 struct msghdr msg = { 0 };
750 struct iovec iov;
751 struct cmsghdr *cmsg;
752 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
753 char buf[1];
754 buf[0] = 'p';
755
01e71852
SH
756 if (pingfirst) {
757 if (msgrecv(sock, buf, 1) != 1) {
1420baf8 758 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
01e71852
SH
759 __func__);
760 return SEND_CREDS_FAIL;
761 }
a05660a6
SH
762 }
763
764 msg.msg_control = cmsgbuf;
765 msg.msg_controllen = sizeof(cmsgbuf);
766
767 cmsg = CMSG_FIRSTHDR(&msg);
768 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
769 cmsg->cmsg_level = SOL_SOCKET;
770 cmsg->cmsg_type = SCM_CREDENTIALS;
771 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
772
773 msg.msg_name = NULL;
774 msg.msg_namelen = 0;
775
776 buf[0] = v;
777 iov.iov_base = buf;
778 iov.iov_len = sizeof(buf);
779 msg.msg_iov = &iov;
780 msg.msg_iovlen = 1;
781
782 if (sendmsg(sock, &msg, 0) < 0) {
1420baf8 783 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
a05660a6
SH
784 strerror(errno));
785 if (errno == 3)
01e71852
SH
786 return SEND_CREDS_NOTSK;
787 return SEND_CREDS_FAIL;
a05660a6
SH
788 }
789
01e71852 790 return SEND_CREDS_OK;
a05660a6
SH
791}
792
793static bool recv_creds(int sock, struct ucred *cred, char *v)
794{
795 struct msghdr msg = { 0 };
796 struct iovec iov;
797 struct cmsghdr *cmsg;
798 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
799 char buf[1];
800 int ret;
801 int optval = 1;
6ee867dc
SH
802 struct timeval tv;
803 fd_set rfds;
a05660a6
SH
804
805 *v = '1';
806
807 cred->pid = -1;
808 cred->uid = -1;
809 cred->gid = -1;
810
811 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
1420baf8 812 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
a05660a6
SH
813 return false;
814 }
815 buf[0] = '1';
816 if (write(sock, buf, 1) != 1) {
1420baf8 817 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
a05660a6
SH
818 return false;
819 }
820
821 msg.msg_name = NULL;
822 msg.msg_namelen = 0;
823 msg.msg_control = cmsgbuf;
824 msg.msg_controllen = sizeof(cmsgbuf);
825
826 iov.iov_base = buf;
827 iov.iov_len = sizeof(buf);
828 msg.msg_iov = &iov;
829 msg.msg_iovlen = 1;
830
6ee867dc
SH
831 FD_ZERO(&rfds);
832 FD_SET(sock, &rfds);
833 tv.tv_sec = 2;
834 tv.tv_usec = 0;
ea56f722 835 if (select(sock+1, &rfds, NULL, NULL, &tv) <= 0) {
6ee867dc
SH
836 fprintf(stderr, "Failed to select for scm_cred: %s\n",
837 strerror(errno));
838 return false;
839 }
840 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
a05660a6 841 if (ret < 0) {
1420baf8 842 fprintf(stderr, "Failed to receive scm_cred: %s\n",
a05660a6
SH
843 strerror(errno));
844 return false;
845 }
846
847 cmsg = CMSG_FIRSTHDR(&msg);
848
849 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
850 cmsg->cmsg_level == SOL_SOCKET &&
851 cmsg->cmsg_type == SCM_CREDENTIALS) {
852 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
853 }
854 *v = buf[0];
855
856 return true;
857}
858
859
860/*
4775fba1
SH
861 * pid_to_ns - reads pids from a ucred over a socket, then writes the
862 * int value back over the socket. This shifts the pid from the
863 * sender's pidns into tpid's pidns.
a05660a6 864 */
4775fba1 865static void pid_to_ns(int sock, pid_t tpid)
a05660a6
SH
866{
867 char v = '0';
868 struct ucred cred;
869
870 while (recv_creds(sock, &cred, &v)) {
871 if (v == '1')
67bd113f 872 _exit(0);
a05660a6 873 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
67bd113f 874 _exit(1);
a05660a6 875 }
67bd113f 876 _exit(0);
a05660a6
SH
877}
878
879/*
4775fba1 880 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
a05660a6 881 * in your old pidns. Only children which you fork will be in the target
4775fba1 882 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
a05660a6
SH
883 * actually convert pids
884 */
4775fba1 885static void pid_to_ns_wrapper(int sock, pid_t tpid)
a05660a6 886{
ea56f722 887 int newnsfd = -1, ret, cpipe[2];
a05660a6
SH
888 char fnam[100];
889 pid_t cpid;
ea56f722
SH
890 struct timeval tv;
891 fd_set s;
892 char v;
a05660a6 893
c0adec85
SH
894 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
895 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 896 _exit(1);
a05660a6
SH
897 newnsfd = open(fnam, O_RDONLY);
898 if (newnsfd < 0)
67bd113f 899 _exit(1);
a05660a6 900 if (setns(newnsfd, 0) < 0)
67bd113f 901 _exit(1);
a05660a6
SH
902 close(newnsfd);
903
ea56f722 904 if (pipe(cpipe) < 0)
67bd113f 905 _exit(1);
a05660a6 906
ea56f722
SH
907loop:
908 cpid = fork();
a05660a6 909 if (cpid < 0)
67bd113f 910 _exit(1);
ea56f722
SH
911
912 if (!cpid) {
913 char b = '1';
914 close(cpipe[0]);
915 if (write(cpipe[1], &b, sizeof(char)) < 0) {
916 fprintf(stderr, "%s (child): erorr on write: %s\n",
917 __func__, strerror(errno));
918 }
919 close(cpipe[1]);
4775fba1 920 pid_to_ns(sock, tpid);
ea56f722
SH
921 }
922 // give the child 1 second to be done forking and
923 // write it's ack
924 FD_ZERO(&s);
925 FD_SET(cpipe[0], &s);
926 tv.tv_sec = 1;
927 tv.tv_usec = 0;
928 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
929 if (ret <= 0)
930 goto again;
931 ret = read(cpipe[0], &v, 1);
932 if (ret != sizeof(char) || v != '1') {
933 goto again;
934 }
935
a05660a6 936 if (!wait_for_pid(cpid))
67bd113f
SH
937 _exit(1);
938 _exit(0);
ea56f722
SH
939
940again:
941 kill(cpid, SIGKILL);
942 wait_for_pid(cpid);
943 goto loop;
a05660a6
SH
944}
945
946/*
947 * To read cgroup files with a particular pid, we will setns into the child
948 * pidns, open a pipe, fork a child - which will be the first to really be in
949 * the child ns - which does the cgm_get_value and writes the data to the pipe.
950 */
951static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
952{
953 int sock[2] = {-1, -1};
954 nih_local char *tmpdata = NULL;
955 int ret;
956 pid_t qpid, cpid = -1;
957 bool answer = false;
958 char v = '0';
959 struct ucred cred;
960 struct timeval tv;
961 fd_set s;
962
963 if (!cgm_get_value(contrl, cg, file, &tmpdata))
964 return false;
965
966 /*
967 * Now we read the pids from returned data one by one, pass
968 * them into a child in the target namespace, read back the
969 * translated pids, and put them into our to-return data
970 */
971
972 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
973 perror("socketpair");
974 exit(1);
975 }
976
977 cpid = fork();
978 if (cpid == -1)
979 goto out;
980
981 if (!cpid) // child
4775fba1 982 pid_to_ns_wrapper(sock[1], tpid);
a05660a6
SH
983
984 char *ptr = tmpdata;
985 cred.uid = 0;
986 cred.gid = 0;
987 while (sscanf(ptr, "%d\n", &qpid) == 1) {
988 cred.pid = qpid;
01e71852
SH
989 ret = send_creds(sock[0], &cred, v, true);
990
991 if (ret == SEND_CREDS_NOTSK)
992 goto next;
993 if (ret == SEND_CREDS_FAIL)
a05660a6
SH
994 goto out;
995
996 // read converted results
997 FD_ZERO(&s);
998 FD_SET(sock[0], &s);
6ee867dc 999 tv.tv_sec = 2;
a05660a6
SH
1000 tv.tv_usec = 0;
1001 ret = select(sock[0]+1, &s, NULL, NULL, &tv);
1002 if (ret <= 0) {
6ee867dc
SH
1003 fprintf(stderr, "%s: select error waiting for pid from child: %s\n",
1004 __func__, strerror(errno));
a05660a6
SH
1005 goto out;
1006 }
1007 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1008 fprintf(stderr, "%s: error reading pid from child: %s\n",
1009 __func__, strerror(errno));
a05660a6
SH
1010 goto out;
1011 }
a05660a6 1012 NIH_MUST( nih_strcat_sprintf(d, NULL, "%d\n", qpid) );
01e71852 1013next:
a05660a6
SH
1014 ptr = strchr(ptr, '\n');
1015 if (!ptr)
1016 break;
1017 ptr++;
1018 }
1019
1020 cred.pid = getpid();
1021 v = '1';
01e71852 1022 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
a05660a6 1023 // failed to ask child to exit
6ee867dc
SH
1024 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
1025 __func__, strerror(errno));
a05660a6
SH
1026 goto out;
1027 }
1028
1029 answer = true;
1030
1031out:
1032 if (cpid != -1)
1033 wait_for_pid(cpid);
1034 if (sock[0] != -1) {
1035 close(sock[0]);
1036 close(sock[1]);
1037 }
1038 return answer;
1039}
1040
99978832
SH
1041static int cg_read(const char *path, char *buf, size_t size, off_t offset,
1042 struct fuse_file_info *fi)
1043{
99978832 1044 struct fuse_context *fc = fuse_get_context();
8f6e8f5e 1045 struct file_info *f = (struct file_info *)fi->fh;
99978832
SH
1046 nih_local struct cgm_keys *k = NULL;
1047
443d13f5 1048 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1049 fprintf(stderr, "Internal error: directory cache info used in cg_read\n");
1050 return -EIO;
1051 }
1052
99978832 1053 if (offset)
7253e0a4 1054 return 0;
99978832
SH
1055
1056 if (!fc)
1057 return -EIO;
1058
8f6e8f5e 1059 if (!f->controller)
99978832
SH
1060 return -EINVAL;
1061
8f6e8f5e 1062 if ((k = get_cgroup_key(f->controller, f->cgroup, f->file)) != NULL) {
99978832 1063 nih_local char *data = NULL;
4775fba1
SH
1064 int s;
1065 bool r;
99978832 1066
8f6e8f5e 1067 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY))
f9a05025
SH
1068 // should never get here
1069 return -EACCES;
99978832 1070
8f6e8f5e
SH
1071 if (strcmp(f->file, "tasks") == 0 ||
1072 strcmp(f->file, "/tasks") == 0 ||
1073 strcmp(f->file, "/cgroup.procs") == 0 ||
1074 strcmp(f->file, "cgroup.procs") == 0)
a05660a6 1075 // special case - we have to translate the pids
8f6e8f5e 1076 r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data);
a05660a6 1077 else
8f6e8f5e 1078 r = cgm_get_value(f->controller, f->cgroup, f->file, &data);
a05660a6 1079
4775fba1 1080 if (!r)
99978832
SH
1081 return -EINVAL;
1082
4775fba1
SH
1083 if (!data)
1084 return 0;
99978832
SH
1085 s = strlen(data);
1086 if (s > size)
1087 s = size;
1088 memcpy(buf, data, s);
5ea0727e
SH
1089 if (s > 0 && s < size && data[s-1] != '\n')
1090 buf[s++] = '\n';
99978832 1091
99978832
SH
1092 return s;
1093 }
1094
1095 return -EINVAL;
1096}
1097
4775fba1
SH
1098static void pid_from_ns(int sock, pid_t tpid)
1099{
1100 pid_t vpid;
1101 struct ucred cred;
1102 char v;
6ee867dc
SH
1103 struct timeval tv;
1104 fd_set s;
1105 int ret;
4775fba1
SH
1106
1107 cred.uid = 0;
1108 cred.gid = 0;
6ee867dc
SH
1109 while (1) {
1110 FD_ZERO(&s);
1111 FD_SET(sock, &s);
1112 tv.tv_sec = 2;
1113 tv.tv_usec = 0;
1114 ret = select(sock+1, &s, NULL, NULL, &tv);
ea56f722
SH
1115 if (ret <= 0) {
1116 fprintf(stderr, "%s: bad select before read from parent: %s\n",
6ee867dc 1117 __func__, strerror(errno));
67bd113f 1118 _exit(1);
6ee867dc
SH
1119 }
1120 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
1121 fprintf(stderr, "%s: bad read from parent: %s\n",
1122 __func__, strerror(errno));
67bd113f 1123 _exit(1);
6ee867dc 1124 }
4775fba1 1125 if (vpid == -1) // done
01e71852 1126 break;
4775fba1
SH
1127 v = '0';
1128 cred.pid = vpid;
01e71852 1129 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
4775fba1
SH
1130 v = '1';
1131 cred.pid = getpid();
01e71852 1132 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
67bd113f 1133 _exit(1);
4775fba1
SH
1134 }
1135 }
67bd113f 1136 _exit(0);
4775fba1
SH
1137}
1138
1139static void pid_from_ns_wrapper(int sock, pid_t tpid)
1140{
ea56f722 1141 int newnsfd = -1, ret, cpipe[2];
4775fba1
SH
1142 char fnam[100];
1143 pid_t cpid;
ea56f722
SH
1144 fd_set s;
1145 struct timeval tv;
1146 char v;
4775fba1 1147
c0adec85
SH
1148 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1149 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1150 _exit(1);
4775fba1
SH
1151 newnsfd = open(fnam, O_RDONLY);
1152 if (newnsfd < 0)
67bd113f 1153 _exit(1);
4775fba1 1154 if (setns(newnsfd, 0) < 0)
67bd113f 1155 _exit(1);
4775fba1
SH
1156 close(newnsfd);
1157
ea56f722 1158 if (pipe(cpipe) < 0)
67bd113f 1159 _exit(1);
ea56f722
SH
1160
1161loop:
4775fba1
SH
1162 cpid = fork();
1163
1164 if (cpid < 0)
67bd113f 1165 _exit(1);
ea56f722
SH
1166
1167 if (!cpid) {
1168 char b = '1';
1169 close(cpipe[0]);
1170 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1171 fprintf(stderr, "%s (child): erorr on write: %s\n",
1172 __func__, strerror(errno));
1173 }
1174 close(cpipe[1]);
4775fba1 1175 pid_from_ns(sock, tpid);
ea56f722
SH
1176 }
1177
1178 // give the child 1 second to be done forking and
1179 // write it's ack
1180 FD_ZERO(&s);
1181 FD_SET(cpipe[0], &s);
1182 tv.tv_sec = 1;
1183 tv.tv_usec = 0;
1184 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
1185 if (ret <= 0)
1186 goto again;
1187 ret = read(cpipe[0], &v, 1);
1188 if (ret != sizeof(char) || v != '1') {
1189 goto again;
1190 }
1191
4775fba1 1192 if (!wait_for_pid(cpid))
67bd113f
SH
1193 _exit(1);
1194 _exit(0);
ea56f722
SH
1195
1196again:
1197 kill(cpid, SIGKILL);
1198 wait_for_pid(cpid);
1199 goto loop;
4775fba1
SH
1200}
1201
1202static bool do_write_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, const char *buf)
1203{
1204 int sock[2] = {-1, -1};
1205 pid_t qpid, cpid = -1;
1206 bool answer = false, fail = false;
1207
1208 /*
1209 * write the pids to a socket, have helper in writer's pidns
1210 * call movepid for us
1211 */
1212 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1213 perror("socketpair");
1214 exit(1);
1215 }
1216
1217 cpid = fork();
1218 if (cpid == -1)
1219 goto out;
1220
1221 if (!cpid) // child
1222 pid_from_ns_wrapper(sock[1], tpid);
1223
1224 const char *ptr = buf;
1225 while (sscanf(ptr, "%d", &qpid) == 1) {
1226 struct ucred cred;
1227 char v;
1228
1229 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1230 fprintf(stderr, "%s: error writing pid to child: %s\n",
1231 __func__, strerror(errno));
4775fba1
SH
1232 goto out;
1233 }
1234
01e71852
SH
1235 if (recv_creds(sock[0], &cred, &v)) {
1236 if (v == '0') {
1237 if (!cgm_move_pid(contrl, cg, cred.pid))
1238 fail = true;
1239 }
4775fba1
SH
1240 }
1241
1242 ptr = strchr(ptr, '\n');
1243 if (!ptr)
1244 break;
1245 ptr++;
1246 }
1247
1248 /* All good, write the value */
1249 qpid = -1;
1250 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
1420baf8 1251 fprintf(stderr, "Warning: failed to ask child to exit\n");
4775fba1
SH
1252
1253 if (!fail)
1254 answer = true;
1255
1256out:
1257 if (cpid != -1)
1258 wait_for_pid(cpid);
1259 if (sock[0] != -1) {
1260 close(sock[0]);
1261 close(sock[1]);
1262 }
1263 return answer;
1264}
1265
2ad6d2bd
SH
1266int cg_write(const char *path, const char *buf, size_t size, off_t offset,
1267 struct fuse_file_info *fi)
1268{
2ad6d2bd 1269 struct fuse_context *fc = fuse_get_context();
47cbf0e5 1270 nih_local char *localbuf = NULL;
8f6e8f5e
SH
1271 nih_local struct cgm_keys *k = NULL;
1272 struct file_info *f = (struct file_info *)fi->fh;
2ad6d2bd 1273
443d13f5 1274 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1275 fprintf(stderr, "Internal error: directory cache info used in cg_write\n");
1276 return -EIO;
1277 }
1278
2ad6d2bd 1279 if (offset)
7253e0a4 1280 return 0;
2ad6d2bd
SH
1281
1282 if (!fc)
1283 return -EIO;
1284
47cbf0e5
SH
1285 localbuf = NIH_MUST( nih_alloc(NULL, size+1) );
1286 localbuf[size] = '\0';
1287 memcpy(localbuf, buf, size);
2ad6d2bd 1288
8f6e8f5e 1289 if ((k = get_cgroup_key(f->controller, f->cgroup, f->file)) != NULL) {
4775fba1
SH
1290 bool r;
1291
8f6e8f5e 1292 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY))
f9a05025 1293 return -EACCES;
2ad6d2bd 1294
8f6e8f5e
SH
1295 if (strcmp(f->file, "tasks") == 0 ||
1296 strcmp(f->file, "/tasks") == 0 ||
1297 strcmp(f->file, "/cgroup.procs") == 0 ||
1298 strcmp(f->file, "cgroup.procs") == 0)
4775fba1 1299 // special case - we have to translate the pids
8f6e8f5e 1300 r = do_write_pids(fc->pid, f->controller, f->cgroup, f->file, localbuf);
4775fba1 1301 else
8f6e8f5e 1302 r = cgm_set_value(f->controller, f->cgroup, f->file, localbuf);
4775fba1
SH
1303
1304 if (!r)
2ad6d2bd
SH
1305 return -EINVAL;
1306
1307 return size;
1308 }
1309
1310 return -EINVAL;
1311}
1312
341b21ad
SH
1313int cg_chown(const char *path, uid_t uid, gid_t gid)
1314{
1315 struct fuse_context *fc = fuse_get_context();
1316 nih_local char * cgdir = NULL;
1317 char *fpath = NULL, *path1, *path2;
1318 nih_local struct cgm_keys *k = NULL;
1319 const char *cgroup;
1320 nih_local char *controller = NULL;
1321
1322
1323 if (!fc)
1324 return -EIO;
1325
1326 if (strcmp(path, "/cgroup") == 0)
1327 return -EINVAL;
1328
1329 controller = pick_controller_from_path(fc, path);
1330 if (!controller)
f9a05025 1331 return -EINVAL;
341b21ad
SH
1332 cgroup = find_cgroup_in_path(path);
1333 if (!cgroup)
1334 /* this is just /cgroup/controller */
1335 return -EINVAL;
1336
1337 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1338
1339 if (!fpath) {
1340 path1 = "/";
1341 path2 = cgdir;
1342 } else {
1343 path1 = cgdir;
1344 path2 = fpath;
1345 }
1346
1347 if (is_child_cgroup(controller, path1, path2)) {
1348 // get uid, gid, from '/tasks' file and make up a mode
1349 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1350 k = get_cgroup_key(controller, cgroup, "tasks");
1351
1352 } else
1353 k = get_cgroup_key(controller, path1, path2);
1354
1355 if (!k)
1356 return -EINVAL;
1357
1358 /*
1359 * This being a fuse request, the uid and gid must be valid
1360 * in the caller's namespace. So we can just check to make
1361 * sure that the caller is root in his uid, and privileged
1362 * over the file's current owner.
1363 */
1364 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD))
f9a05025 1365 return -EACCES;
341b21ad
SH
1366
1367 if (!cgm_chown_file(controller, cgroup, uid, gid))
1368 return -EINVAL;
1369 return 0;
1370}
2ad6d2bd 1371
fd2e4e03
SH
1372int cg_chmod(const char *path, mode_t mode)
1373{
0a1bb5ea
SH
1374 struct fuse_context *fc = fuse_get_context();
1375 nih_local char * cgdir = NULL;
1376 char *fpath = NULL, *path1, *path2;
1377 nih_local struct cgm_keys *k = NULL;
1378 const char *cgroup;
1379 nih_local char *controller = NULL;
1380
1381 if (!fc)
1382 return -EIO;
1383
1384 if (strcmp(path, "/cgroup") == 0)
1385 return -EINVAL;
1386
1387 controller = pick_controller_from_path(fc, path);
1388 if (!controller)
f9a05025 1389 return -EINVAL;
0a1bb5ea
SH
1390 cgroup = find_cgroup_in_path(path);
1391 if (!cgroup)
1392 /* this is just /cgroup/controller */
1393 return -EINVAL;
1394
1395 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1396
1397 if (!fpath) {
1398 path1 = "/";
1399 path2 = cgdir;
1400 } else {
1401 path1 = cgdir;
1402 path2 = fpath;
1403 }
1404
1405 if (is_child_cgroup(controller, path1, path2)) {
1406 // get uid, gid, from '/tasks' file and make up a mode
1407 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1408 k = get_cgroup_key(controller, cgroup, "tasks");
1409
1410 } else
1411 k = get_cgroup_key(controller, path1, path2);
1412
1413 if (!k)
1414 return -EINVAL;
1415
1416 /*
1417 * This being a fuse request, the uid and gid must be valid
1418 * in the caller's namespace. So we can just check to make
1419 * sure that the caller is root in his uid, and privileged
1420 * over the file's current owner.
1421 */
1422 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT))
1423 return -EPERM;
1424
1425 if (!cgm_chmod_file(controller, cgroup, mode))
1426 return -EINVAL;
1427 return 0;
fd2e4e03
SH
1428}
1429
ab54b798
SH
1430int cg_mkdir(const char *path, mode_t mode)
1431{
1432 struct fuse_context *fc = fuse_get_context();
1433 nih_local struct cgm_keys **list = NULL;
1434 char *fpath = NULL, *path1;
1435 nih_local char * cgdir = NULL;
1436 const char *cgroup;
1437 nih_local char *controller = NULL;
1438
ab54b798
SH
1439 if (!fc)
1440 return -EIO;
1441
1442
1443 controller = pick_controller_from_path(fc, path);
1444 if (!controller)
f9a05025 1445 return -EINVAL;
ab54b798
SH
1446
1447 cgroup = find_cgroup_in_path(path);
1448 if (!cgroup)
f9a05025 1449 return -EINVAL;
ab54b798
SH
1450
1451 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1452 if (!fpath)
1453 path1 = "/";
1454 else
1455 path1 = cgdir;
1456
1457 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR))
f9a05025 1458 return -EACCES;
ab54b798
SH
1459
1460
1461 if (!cgm_create(controller, cgroup, fc->uid, fc->gid))
1462 return -EINVAL;
1463
1464 return 0;
1465}
1466
50d8d5b5
SH
1467static int cg_rmdir(const char *path)
1468{
1469 struct fuse_context *fc = fuse_get_context();
1470 nih_local struct cgm_keys **list = NULL;
1471 char *fpath = NULL;
1472 nih_local char * cgdir = NULL;
1473 const char *cgroup;
1474 nih_local char *controller = NULL;
1475
1476 if (!fc)
1477 return -EIO;
1478
1479
1480 controller = pick_controller_from_path(fc, path);
1481 if (!controller)
f9a05025 1482 return -EINVAL;
50d8d5b5
SH
1483
1484 cgroup = find_cgroup_in_path(path);
1485 if (!cgroup)
f9a05025 1486 return -EINVAL;
50d8d5b5
SH
1487
1488 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1489 if (!fpath)
1490 return -EINVAL;
1491
1492 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY))
f9a05025 1493 return -EACCES;
50d8d5b5
SH
1494
1495 if (!cgm_remove(controller, cgroup))
1496 return -EINVAL;
1497
1498 return 0;
1499}
1500
2dc17609
SH
1501static bool startswith(const char *line, const char *pref)
1502{
1503 if (strncmp(line, pref, strlen(pref)) == 0)
1504 return true;
1505 return false;
1506}
1507
1508static void get_mem_cached(char *memstat, unsigned long *v)
1509{
1510 char *eol;
1511
1512 *v = 0;
1513 while (*memstat) {
1514 if (startswith(memstat, "total_cache")) {
1515 sscanf(memstat + 11, "%lu", v);
1516 *v /= 1024;
1517 return;
1518 }
1519 eol = strchr(memstat, '\n');
1520 if (!eol)
1521 return;
1522 memstat = eol+1;
1523 }
1524}
1525
49878439 1526static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
2f919d9d 1527{
49878439
YY
1528 char *eol;
1529 char key[32];
2f919d9d 1530
49878439
YY
1531 memset(key, 0, 32);
1532 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
2f919d9d 1533
49878439
YY
1534 size_t len = strlen(key);
1535 *v = 0;
1536
1537 while (*str) {
1538 if (startswith(str, key)) {
2f919d9d
SH
1539 sscanf(str + len, "%lu", v);
1540 return;
1541 }
1542 eol = strchr(str, '\n');
49878439 1543 if (!eol)
2f919d9d 1544 return;
49878439
YY
1545 str = eol+1;
1546 }
1547}
1548
2dc17609
SH
1549static char *get_pid_cgroup(pid_t pid, const char *contrl)
1550{
1551 nih_local char *fnam = NULL;
1552 FILE *f;
1553 char *answer = NULL;
1554 char *line = NULL;
1555 size_t len = 0;
1556
1557 fnam = NIH_MUST( nih_sprintf(NULL, "/proc/%d/cgroup", pid) );
1558 if (!(f = fopen(fnam, "r")))
1559 return false;
1560
1561 while (getline(&line, &len, f) != -1) {
1562 char *c1, *c2;
1563 if (!line[0])
1564 continue;
1565 c1 = strchr(line, ':');
1566 if (!c1)
1567 goto out;
1568 c1++;
1569 c2 = strchr(c1, ':');
1570 if (!c2)
1571 goto out;
1572 *c2 = '\0';
1573 if (strcmp(c1, contrl) != 0)
1574 continue;
1575 c2++;
1576 stripnewline(c2);
1577 answer = NIH_MUST( nih_strdup(NULL, c2) );
1578 goto out;
1579 }
1580
1581out:
1582 fclose(f);
1583 free(line);
1584 return answer;
1585}
1586
758ad80c 1587/*
2ad6d2bd 1588 * FUSE ops for /proc
758ad80c 1589 */
758ad80c 1590
23ce2127
SH
1591static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1592 struct fuse_file_info *fi)
1593{
2dc17609 1594 struct fuse_context *fc = fuse_get_context();
97f1f27b 1595 struct file_info *d = (struct file_info *)fi->fh;
2dc17609
SH
1596 nih_local char *cg = get_pid_cgroup(fc->pid, "memory");
1597 nih_local char *memlimit_str = NULL, *memusage_str = NULL, *memstat_str = NULL;
1598 unsigned long memlimit = 0, memusage = 0, cached = 0, hosttotal = 0;
1599 char *line = NULL;
e1068397 1600 size_t linelen = 0, total_len = 0, rv = 0;
97f1f27b
YY
1601 char *cache = d->buf;
1602 size_t cache_size = d->buflen;
2dc17609
SH
1603 FILE *f;
1604
97f1f27b
YY
1605 if (offset){
1606 if (offset > d->size)
1607 return -EINVAL;
1608 int left = d->size - offset;
1609 total_len = left > size ? size: left;
1610 memcpy(buf, cache + offset, total_len);
1611 return total_len;
1612 }
2dc17609
SH
1613
1614 if (!cg)
1615 return 0;
1616
1617 if (!cgm_get_value("memory", cg, "memory.limit_in_bytes", &memlimit_str))
1618 return 0;
1619 if (!cgm_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
1620 return 0;
1621 if (!cgm_get_value("memory", cg, "memory.stat", &memstat_str))
1622 return 0;
1623 memlimit = strtoul(memlimit_str, NULL, 10);
1624 memusage = strtoul(memusage_str, NULL, 10);
1625 memlimit /= 1024;
1626 memusage /= 1024;
1627 get_mem_cached(memstat_str, &cached);
1628
1629 f = fopen("/proc/meminfo", "r");
1630 if (!f)
1631 return 0;
1632
1633 while (getline(&line, &linelen, f) != -1) {
1634 size_t l;
1635 char *printme, lbuf[100];
1636
1637 memset(lbuf, 0, 100);
1638 if (startswith(line, "MemTotal:")) {
1639 sscanf(line+14, "%lu", &hosttotal);
1640 if (hosttotal < memlimit)
1641 memlimit = hosttotal;
1642 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
1643 printme = lbuf;
1644 } else if (startswith(line, "MemFree:")) {
1645 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
1646 printme = lbuf;
1647 } else if (startswith(line, "MemAvailable:")) {
1648 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
1649 printme = lbuf;
1650 } else if (startswith(line, "Buffers:")) {
1651 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
1652 printme = lbuf;
1653 } else if (startswith(line, "Cached:")) {
1654 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
1655 printme = lbuf;
1656 } else if (startswith(line, "SwapCached:")) {
1657 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
1658 printme = lbuf;
1659 } else
1660 printme = line;
97f1f27b
YY
1661
1662 l = snprintf(cache, cache_size, "%s", printme);
e1068397
MM
1663 if (l < 0) {
1664 perror("Error writing to cache");
1665 rv = 0;
1666 goto err;
1667
1668 }
1669 if (l >= cache_size) {
1670 fprintf(stderr, "Internal error: truncated write to cache\n");
1671 rv = 0;
1672 goto err;
1673 }
1674
97f1f27b
YY
1675 cache += l;
1676 cache_size -= l;
2f919d9d 1677 total_len += l;
2dc17609
SH
1678 }
1679
97f1f27b
YY
1680 d->size = total_len;
1681 if (total_len > size ) total_len = size;
1682 memcpy(buf, d->buf, total_len);
1683
e1068397
MM
1684 rv = total_len;
1685 err:
92c84dc4
SH
1686 fclose(f);
1687 free(line);
e1068397 1688 return rv;
23ce2127
SH
1689}
1690
1691/*
1692 * Read the cpuset.cpus for cg
1693 * Return the answer in a nih_alloced string
1694 */
1695static char *get_cpuset(const char *cg)
1696{
1697 char *answer;
1698
1699 if (!cgm_get_value("cpuset", cg, "cpuset.cpus", &answer))
1700 return NULL;
1701 return answer;
1702}
1703
fa47bb52 1704bool cpu_in_cpuset(int cpu, const char *cpuset);
23ce2127 1705
aeb56147
SH
1706static bool cpuline_in_cpuset(const char *line, const char *cpuset)
1707{
1708 int cpu;
1709
1710 if (sscanf(line, "processor : %d", &cpu) != 1)
1711 return false;
1712 return cpu_in_cpuset(cpu, cpuset);
1713}
1714
23ce2127
SH
1715/*
1716 * check whether this is a '^processor" line in /proc/cpuinfo
1717 */
1718static bool is_processor_line(const char *line)
1719{
1720 int cpu;
1721
1722 if (sscanf(line, "processor : %d", &cpu) == 1)
1723 return true;
1724 return false;
1725}
1726
23ce2127
SH
1727static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
1728 struct fuse_file_info *fi)
1729{
1730 struct fuse_context *fc = fuse_get_context();
97f1f27b 1731 struct file_info *d = (struct file_info *)fi->fh;
23ce2127
SH
1732 nih_local char *cg = get_pid_cgroup(fc->pid, "cpuset");
1733 nih_local char *cpuset = NULL;
1734 char *line = NULL;
e1068397 1735 size_t linelen = 0, total_len = 0, rv = 0;
23ce2127
SH
1736 bool am_printing = false;
1737 int curcpu = -1;
97f1f27b
YY
1738 char *cache = d->buf;
1739 size_t cache_size = d->buflen;
23ce2127
SH
1740 FILE *f;
1741
97f1f27b
YY
1742 if (offset){
1743 if (offset > d->size)
1744 return -EINVAL;
1745 int left = d->size - offset;
1746 total_len = left > size ? size: left;
1747 memcpy(buf, cache + offset, total_len);
2f919d9d 1748 return total_len;
97f1f27b 1749 }
23ce2127
SH
1750
1751 if (!cg)
1752 return 0;
1753
1754 cpuset = get_cpuset(cg);
1755 if (!cpuset)
1756 return 0;
1757
1758 f = fopen("/proc/cpuinfo", "r");
1759 if (!f)
1760 return 0;
1761
1762 while (getline(&line, &linelen, f) != -1) {
1763 size_t l;
1764 if (is_processor_line(line)) {
aeb56147 1765 am_printing = cpuline_in_cpuset(line, cpuset);
23ce2127
SH
1766 if (am_printing) {
1767 curcpu ++;
97f1f27b 1768 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
e1068397
MM
1769 if (l < 0) {
1770 perror("Error writing to cache");
1771 rv = 0;
1772 goto err;
1773 }
1774 if (l >= cache_size) {
1775 fprintf(stderr, "Internal error: truncated write to cache\n");
1776 rv = 0;
1777 goto err;
1778 }
97f1f27b
YY
1779 if (l < cache_size){
1780 cache += l;
1781 cache_size -= l;
1782 total_len += l;
1783 }else{
1784 cache += cache_size;
1785 total_len += cache_size;
1786 cache_size = 0;
1787 break;
1788 }
23ce2127
SH
1789 }
1790 continue;
1791 }
1792 if (am_printing) {
97f1f27b 1793 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
1794 if (l < 0) {
1795 perror("Error writing to cache");
1796 rv = 0;
1797 goto err;
1798 }
1799 if (l >= cache_size) {
1800 fprintf(stderr, "Internal error: truncated write to cache\n");
1801 rv = 0;
1802 goto err;
1803 }
97f1f27b
YY
1804 if (l < cache_size) {
1805 cache += l;
1806 cache_size -= l;
1807 total_len += l;
1808 } else {
1809 cache += cache_size;
1810 total_len += cache_size;
1811 cache_size = 0;
1812 break;
1813 }
23ce2127
SH
1814 }
1815 }
1816
97f1f27b
YY
1817 d->size = total_len;
1818 if (total_len > size ) total_len = size;
1819
1820 /* read from off 0 */
1821 memcpy(buf, d->buf, total_len);
e1068397
MM
1822 rv = total_len;
1823 err:
92c84dc4
SH
1824 fclose(f);
1825 free(line);
e1068397 1826 return rv;
23ce2127
SH
1827}
1828
1829static int proc_stat_read(char *buf, size_t size, off_t offset,
1830 struct fuse_file_info *fi)
1831{
aeb56147 1832 struct fuse_context *fc = fuse_get_context();
97f1f27b 1833 struct file_info *d = (struct file_info *)fi->fh;
aeb56147
SH
1834 nih_local char *cg = get_pid_cgroup(fc->pid, "cpuset");
1835 nih_local char *cpuset = NULL;
1836 char *line = NULL;
e1068397 1837 size_t linelen = 0, total_len = 0, rv = 0;
2a0fde62 1838 int curcpu = -1; /* cpu numbering starts at 0 */
97f1f27b
YY
1839 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0;
1840 unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
1841 irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0;
1842#define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
1843 char cpuall[CPUALL_MAX_SIZE];
1844 /* reserve for cpu all */
1845 char *cache = d->buf + CPUALL_MAX_SIZE;
1846 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
aeb56147
SH
1847 FILE *f;
1848
97f1f27b
YY
1849 if (offset){
1850 if (offset > d->size)
1851 return -EINVAL;
1852 int left = d->size - offset;
1853 total_len = left > size ? size: left;
1854 memcpy(buf, d->buf + offset, total_len);
2f919d9d 1855 return total_len;
97f1f27b 1856 }
aeb56147
SH
1857
1858 if (!cg)
1859 return 0;
1860
1861 cpuset = get_cpuset(cg);
1862 if (!cpuset)
1863 return 0;
1864
1865 f = fopen("/proc/stat", "r");
1866 if (!f)
1867 return 0;
1868
97f1f27b
YY
1869 //skip first line
1870 if (getline(&line, &linelen, f) < 0) {
1871 fprintf(stderr, "proc_stat_read read first line failed\n");
1872 goto out;
1873 }
1874
aeb56147
SH
1875 while (getline(&line, &linelen, f) != -1) {
1876 size_t l;
1877 int cpu;
2a0fde62 1878 char cpu_char[10]; /* That's a lot of cores */
aeb56147
SH
1879 char *c;
1880
2a0fde62
CB
1881 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
1882 /* not a ^cpuN line containing a number N, just print it */
97f1f27b 1883 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
1884 if (l < 0) {
1885 perror("Error writing to cache");
1886 rv = 0;
1887 goto err;
1888 }
1889 if (l >= cache_size) {
1890 fprintf(stderr, "Internal error: truncated write to cache\n");
1891 rv = 0;
1892 goto err;
1893 }
1894 if (l < cache_size) {
97f1f27b
YY
1895 cache += l;
1896 cache_size -= l;
1897 total_len += l;
1898 continue;
e1068397 1899 } else {
97f1f27b
YY
1900 //no more space, break it
1901 cache += cache_size;
1902 total_len += cache_size;
1903 cache_size = 0;
1904 break;
1905 }
aeb56147 1906 }
2a0fde62
CB
1907
1908 if (sscanf(cpu_char, "%d", &cpu) != 1)
1909 continue;
aeb56147
SH
1910 if (!cpu_in_cpuset(cpu, cpuset))
1911 continue;
1912 curcpu ++;
1913
1914 c = strchr(line, ' ');
1915 if (!c)
1916 continue;
25c5e8fb 1917 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
e1068397
MM
1918 if (l < 0) {
1919 perror("Error writing to cache");
1920 rv = 0;
1921 goto err;
1922
1923 }
1924 if (l >= cache_size) {
1925 fprintf(stderr, "Internal error: truncated write to cache\n");
1926 rv = 0;
1927 goto err;
1928 }
1929
97f1f27b
YY
1930 cache += l;
1931 cache_size -= l;
aeb56147 1932 total_len += l;
2f919d9d 1933
97f1f27b
YY
1934 if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, &idle, &iowait, &irq,
1935 &softirq, &steal, &guest) != 9)
1936 continue;
1937 user_sum += user;
1938 nice_sum += nice;
1939 system_sum += system;
1940 idle_sum += idle;
1941 iowait_sum += iowait;
1942 irq_sum += irq;
1943 softirq_sum += softirq;
1944 steal_sum += steal;
2f919d9d 1945 guest_sum += guest;
97f1f27b
YY
1946 }
1947
1948 cache = d->buf;
1949
2f919d9d 1950 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
97f1f27b
YY
1951 "cpu ", user_sum, nice_sum, system_sum, idle_sum, iowait_sum, irq_sum, softirq_sum, steal_sum, guest_sum);
1952 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE){
1953 memcpy(cache, cpuall, cpuall_len);
2f919d9d 1954 cache += cpuall_len;
97f1f27b
YY
1955 }else{
1956 /* shouldn't happen */
1957 fprintf(stderr, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len);
1958 cpuall_len = 0;
1959 }
1960
1961 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
1962 total_len += cpuall_len;
1963 d->size = total_len;
1964 if (total_len > size ) total_len = size;
1965
1966 memcpy(buf, d->buf, total_len);
e1068397
MM
1967 out:
1968 rv = total_len;
1969 err:
92c84dc4
SH
1970 fclose(f);
1971 free(line);
e1068397 1972 return rv;
23ce2127
SH
1973}
1974
7bbf2246
SH
1975/*
1976 * How to guess what to present for uptime?
1977 * One thing we could do would be to take the date on the caller's
1978 * memory.usage_in_bytes file, which should equal the time of creation
1979 * of his cgroup. However, a task could be in a sub-cgroup of the
1980 * container. The same problem exists if we try to look at the ages
1981 * of processes in the caller's cgroup.
1982 *
1983 * So we'll fork a task that will enter the caller's pidns, mount a
1984 * fresh procfs, get the age of /proc/1, and pass that back over a pipe.
1985 *
1986 * For the second uptime #, we'll do as Stéphane had done, just copy
1987 * the number from /proc/uptime. Not sure how to best emulate 'idle'
1988 * time. Maybe someone can come up with a good algorithm and submit a
1989 * patch. Maybe something based on cpushare info?
1990 */
41bb9357
SH
1991
1992/* return age of the reaper for $pid, taken from ctime of its procdir */
1993static long int get_pid1_time(pid_t pid)
1994{
1995 char fnam[100];
ea56f722 1996 int fd, cpipe[2], ret;
41bb9357 1997 struct stat sb;
ea56f722
SH
1998 pid_t cpid;
1999 struct timeval tv;
2000 fd_set s;
2001 char v;
41bb9357
SH
2002
2003 if (unshare(CLONE_NEWNS))
2004 return 0;
2005
5ca64c2a
SG
2006 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
2007 perror("rslave mount failed");
2008 return 0;
2009 }
2010
c0adec85
SH
2011 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", pid);
2012 if (ret < 0 || ret >= sizeof(fnam))
2013 return 0;
2014
41bb9357
SH
2015 fd = open(fnam, O_RDONLY);
2016 if (fd < 0) {
2017 perror("get_pid1_time open of ns/pid");
2018 return 0;
2019 }
2020 if (setns(fd, 0)) {
2021 perror("get_pid1_time setns 1");
2022 close(fd);
2023 return 0;
2024 }
2025 close(fd);
41bb9357 2026
ea56f722
SH
2027 if (pipe(cpipe) < 0)
2028 exit(1);
41bb9357 2029
ea56f722
SH
2030loop:
2031 cpid = fork();
2032 if (cpid < 0)
41bb9357 2033 return 0;
ea56f722
SH
2034
2035 if (!cpid) {
2036 char b = '1';
2037 close(cpipe[0]);
2038 if (write(cpipe[1], &b, sizeof(char)) < 0) {
2039 fprintf(stderr, "%s (child): erorr on write: %s\n",
2040 __func__, strerror(errno));
2041 }
2042 close(cpipe[1]);
2043 umount2("/proc", MNT_DETACH);
2044 if (mount("proc", "/proc", "proc", 0, NULL)) {
2045 perror("get_pid1_time mount");
2046 return 0;
2047 }
2048 ret = lstat("/proc/1", &sb);
2049 if (ret) {
2050 perror("get_pid1_time lstat");
2051 return 0;
2052 }
2053 return time(NULL) - sb.st_ctime;
41bb9357 2054 }
ea56f722
SH
2055
2056 // give the child 1 second to be done forking and
2057 // write it's ack
2058 FD_ZERO(&s);
2059 FD_SET(cpipe[0], &s);
2060 tv.tv_sec = 1;
2061 tv.tv_usec = 0;
2062 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
2063 if (ret <= 0)
2064 goto again;
2065 ret = read(cpipe[0], &v, 1);
2066 if (ret != sizeof(char) || v != '1') {
2067 goto again;
41bb9357 2068 }
ea56f722
SH
2069
2070 wait_for_pid(cpid);
67bd113f 2071 _exit(0);
ea56f722
SH
2072
2073again:
2074 kill(cpid, SIGKILL);
2075 wait_for_pid(cpid);
2076 goto loop;
41bb9357
SH
2077}
2078
2079static long int getreaperage(pid_t qpid)
2080{
2081 int pid, mypipe[2], ret;
2082 struct timeval tv;
2083 fd_set s;
2084 long int mtime, answer = 0;
2085
2086 if (pipe(mypipe)) {
2087 return 0;
2088 }
2089
2090 pid = fork();
2091
2092 if (!pid) { // child
2093 mtime = get_pid1_time(qpid);
2094 if (write(mypipe[1], &mtime, sizeof(mtime)) != sizeof(mtime))
2095 fprintf(stderr, "Warning: bad write from getreaperage\n");
67bd113f 2096 _exit(0);
41bb9357
SH
2097 }
2098
2099 close(mypipe[1]);
2100 FD_ZERO(&s);
2101 FD_SET(mypipe[0], &s);
2102 tv.tv_sec = 1;
2103 tv.tv_usec = 0;
2104 ret = select(mypipe[0]+1, &s, NULL, NULL, &tv);
ea56f722 2105 if (ret <= 0) {
41bb9357
SH
2106 perror("select");
2107 goto out;
2108 }
2109 if (!ret) {
1420baf8 2110 fprintf(stderr, "timed out\n");
41bb9357
SH
2111 goto out;
2112 }
2113 if (read(mypipe[0], &mtime, sizeof(mtime)) != sizeof(mtime)) {
2114 perror("read");
2115 goto out;
2116 }
2117 answer = mtime;
2118
2119out:
2120 wait_for_pid(pid);
2121 close(mypipe[0]);
2122 return answer;
2123}
2124
2125static long int getprocidle(void)
2126{
2127 FILE *f = fopen("/proc/uptime", "r");
2128 long int age, idle;
92c84dc4 2129 int ret;
41bb9357
SH
2130 if (!f)
2131 return 0;
92c84dc4
SH
2132 ret = fscanf(f, "%ld %ld", &age, &idle);
2133 fclose(f);
2134 if (ret != 2)
41bb9357
SH
2135 return 0;
2136 return idle;
2137}
2138
2139/*
2140 * We read /proc/uptime and reuse its second field.
2141 * For the first field, we use the mtime for the reaper for
2142 * the calling pid as returned by getreaperage
2143 */
23ce2127
SH
2144static int proc_uptime_read(char *buf, size_t size, off_t offset,
2145 struct fuse_file_info *fi)
2146{
41bb9357 2147 struct fuse_context *fc = fuse_get_context();
97f1f27b 2148 struct file_info *d = (struct file_info *)fi->fh;
41bb9357
SH
2149 long int reaperage = getreaperage(fc->pid);;
2150 long int idletime = getprocidle();
97f1f27b 2151 size_t total_len = 0;
41bb9357 2152
97f1f27b
YY
2153 if (offset){
2154 if (offset > d->size)
2155 return -EINVAL;
2156 return 0;
2157 }
2158
2159 total_len = snprintf(buf, size, "%ld %ld\n", reaperage, idletime);
e1068397
MM
2160 if (total_len < 0){
2161 perror("Error writing to cache");
2162 return 0;
2163 }
cdcdb29b
MM
2164 if (total_len >= size){
2165 d->size = size;
2166 return size;
2167 }
2168
97f1f27b
YY
2169 d->size = total_len;
2170 return total_len;
23ce2127
SH
2171}
2172
49878439
YY
2173static int proc_diskstats_read(char *buf, size_t size, off_t offset,
2174 struct fuse_file_info *fi)
2175{
2176 char dev_name[72];
2177 struct fuse_context *fc = fuse_get_context();
97f1f27b 2178 struct file_info *d = (struct file_info *)fi->fh;
49878439
YY
2179 nih_local char *cg = get_pid_cgroup(fc->pid, "blkio");
2180 nih_local char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
2181 *io_wait_time_str = NULL, *io_service_time_str = NULL;
2182 unsigned long read = 0, write = 0;
2183 unsigned long read_merged = 0, write_merged = 0;
2184 unsigned long read_sectors = 0, write_sectors = 0;
2185 unsigned long read_ticks = 0, write_ticks = 0;
2186 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
2187 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
2188 char *line = NULL;
e1068397 2189 size_t linelen = 0, total_len = 0, rv = 0;
49878439
YY
2190 unsigned int major = 0, minor = 0;
2191 int i = 0;
2192 FILE *f;
2193
97f1f27b
YY
2194 if (offset){
2195 if (offset > d->size)
2196 return -EINVAL;
2197 return 0;
2198 }
49878439
YY
2199
2200 if (!cg)
2201 return 0;
2202
2203 if (!cgm_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
2204 return 0;
2205 if (!cgm_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
2206 return 0;
2207 if (!cgm_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
2208 return 0;
2209 if (!cgm_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
2210 return 0;
2211 if (!cgm_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
2212 return 0;
2213
2214
2215 f = fopen("/proc/diskstats", "r");
2216 if (!f)
2217 return 0;
2218
2219 while (getline(&line, &linelen, f) != -1) {
2220 size_t l;
2221 char *printme, lbuf[256];
2222
c0adec85 2223 i = sscanf(line, "%u %u %71s", &major, &minor, dev_name);
49878439
YY
2224 if(i == 3){
2225 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
2226 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
2227 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
2228 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
2229 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
2230 read_sectors = read_sectors/512;
2231 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
2232 write_sectors = write_sectors/512;
2f919d9d 2233
49878439
YY
2234 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
2235 rd_svctm = rd_svctm/1000000;
2236 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
2237 rd_wait = rd_wait/1000000;
2238 read_ticks = rd_svctm + rd_wait;
2239
2240 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
2241 wr_svctm = wr_svctm/1000000;
2242 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
2243 wr_wait = wr_wait/1000000;
2244 write_ticks = wr_svctm + wr_wait;
2245
2246 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
2247 tot_ticks = tot_ticks/1000000;
2248 }else{
2249 continue;
2250 }
2251
2252 memset(lbuf, 0, 256);
2253 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
2f919d9d 2254 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
49878439
YY
2255 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
2256 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
2257 printme = lbuf;
2258 } else
2259 continue;
2260
2261 l = snprintf(buf, size, "%s", printme);
e1068397
MM
2262 if (l < 0) {
2263 perror("Error writing to fuse buf");
2264 rv = 0;
2265 goto err;
2266 }
2267 if (l >= size) {
2268 fprintf(stderr, "Internal error: truncated write to cache\n");
2269 rv = 0;
2270 goto err;
2271 }
49878439
YY
2272 buf += l;
2273 size -= l;
2274 total_len += l;
2275 }
2276
97f1f27b 2277 d->size = total_len;
e1068397
MM
2278 rv = total_len;
2279 err:
49878439
YY
2280 fclose(f);
2281 free(line);
e1068397 2282 return rv;
49878439
YY
2283}
2284
23ce2127
SH
2285static off_t get_procfile_size(const char *which)
2286{
2287 FILE *f = fopen(which, "r");
2288 char *line = NULL;
2289 size_t len = 0;
2290 ssize_t sz, answer = 0;
2291 if (!f)
2292 return 0;
2293
2294 while ((sz = getline(&line, &len, f)) != -1)
2295 answer += sz;
2296 fclose (f);
92c84dc4 2297 free(line);
23ce2127
SH
2298
2299 return answer;
2300}
2301
758ad80c
SH
2302static int proc_getattr(const char *path, struct stat *sb)
2303{
35629743
SH
2304 struct timespec now;
2305
2306 memset(sb, 0, sizeof(struct stat));
2307 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
2308 return -EINVAL;
2309 sb->st_uid = sb->st_gid = 0;
2310 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
2311 if (strcmp(path, "/proc") == 0) {
2312 sb->st_mode = S_IFDIR | 00555;
2313 sb->st_nlink = 2;
2314 return 0;
2315 }
2316 if (strcmp(path, "/proc/meminfo") == 0 ||
2317 strcmp(path, "/proc/cpuinfo") == 0 ||
2318 strcmp(path, "/proc/uptime") == 0 ||
49878439
YY
2319 strcmp(path, "/proc/stat") == 0 ||
2320 strcmp(path, "/proc/diskstats") == 0) {
7253e0a4 2321 sb->st_size = 0;
35629743
SH
2322 sb->st_mode = S_IFREG | 00444;
2323 sb->st_nlink = 1;
2324 return 0;
2325 }
2326
2327 return -ENOENT;
2328}
2329
2330static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2331 struct fuse_file_info *fi)
2332{
2333 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
2334 filler(buf, "meminfo", NULL, 0) != 0 ||
2335 filler(buf, "stat", NULL, 0) != 0 ||
49878439
YY
2336 filler(buf, "uptime", NULL, 0) != 0 ||
2337 filler(buf, "diskstats", NULL, 0) != 0)
758ad80c 2338 return -EINVAL;
758ad80c
SH
2339 return 0;
2340}
2341
35629743
SH
2342static int proc_open(const char *path, struct fuse_file_info *fi)
2343{
96fc5ee6
SH
2344 int type = -1;
2345 struct file_info *info;
2346
2347 if (strcmp(path, "/proc/meminfo") == 0)
2348 type = LXC_TYPE_PROC_MEMINFO;
2349 else if (strcmp(path, "/proc/cpuinfo") == 0)
2350 type = LXC_TYPE_PROC_CPUINFO;
2351 else if (strcmp(path, "/proc/uptime") == 0)
2352 type = LXC_TYPE_PROC_UPTIME;
2353 else if (strcmp(path, "/proc/stat") == 0)
2354 type = LXC_TYPE_PROC_STAT;
2355 else if (strcmp(path, "/proc/diskstats") == 0)
2356 type = LXC_TYPE_PROC_DISKSTATS;
2357 if (type == -1)
2358 return -ENOENT;
2359
2360 info = NIH_MUST( nih_alloc(NULL, sizeof(*info)) );
2361 memset(info, 0, sizeof(*info));
2362 info->type = type;
2363
97f1f27b 2364 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
25c5e8fb 2365 info->buf = NIH_MUST( nih_alloc(info, info->buflen) );
97f1f27b
YY
2366 memset(info->buf, 0, info->buflen);
2367 /* set actual size to buffer size */
2f919d9d 2368 info->size = info->buflen;
97f1f27b 2369
96fc5ee6
SH
2370 fi->fh = (unsigned long)info;
2371 return 0;
2372}
2373
2374static int proc_release(const char *path, struct fuse_file_info *fi)
2375{
2376 struct file_info *f = (struct file_info *)fi->fh;
2377
2378 do_release_file_info(f);
2379 return 0;
35629743
SH
2380}
2381
35629743
SH
2382static int proc_read(const char *path, char *buf, size_t size, off_t offset,
2383 struct fuse_file_info *fi)
2384{
96fc5ee6
SH
2385 struct file_info *f = (struct file_info *) fi->fh;
2386
2387 switch (f->type) {
2f919d9d 2388 case LXC_TYPE_PROC_MEMINFO:
23ce2127 2389 return proc_meminfo_read(buf, size, offset, fi);
96fc5ee6 2390 case LXC_TYPE_PROC_CPUINFO:
23ce2127 2391 return proc_cpuinfo_read(buf, size, offset, fi);
96fc5ee6 2392 case LXC_TYPE_PROC_UPTIME:
23ce2127 2393 return proc_uptime_read(buf, size, offset, fi);
96fc5ee6 2394 case LXC_TYPE_PROC_STAT:
23ce2127 2395 return proc_stat_read(buf, size, offset, fi);
96fc5ee6 2396 case LXC_TYPE_PROC_DISKSTATS:
49878439 2397 return proc_diskstats_read(buf, size, offset, fi);
96fc5ee6
SH
2398 default:
2399 return -EINVAL;
2400 }
35629743
SH
2401}
2402
2ad6d2bd
SH
2403/*
2404 * FUSE ops for /
2405 * these just delegate to the /proc and /cgroup ops as
2406 * needed
2407 */
758ad80c
SH
2408
2409static int lxcfs_getattr(const char *path, struct stat *sb)
2410{
2411 if (strcmp(path, "/") == 0) {
2412 sb->st_mode = S_IFDIR | 00755;
2413 sb->st_nlink = 2;
2414 return 0;
2415 }
2416 if (strncmp(path, "/cgroup", 7) == 0) {
2417 return cg_getattr(path, sb);
2418 }
35629743 2419 if (strncmp(path, "/proc", 5) == 0) {
758ad80c
SH
2420 return proc_getattr(path, sb);
2421 }
2422 return -EINVAL;
2423}
2424
2425static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
2426{
2427 if (strcmp(path, "/") == 0)
2428 return 0;
2429
2430 if (strncmp(path, "/cgroup", 7) == 0) {
2431 return cg_opendir(path, fi);
2432 }
35629743
SH
2433 if (strcmp(path, "/proc") == 0)
2434 return 0;
2435 return -ENOENT;
758ad80c
SH
2436}
2437
2438static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2439 struct fuse_file_info *fi)
2440{
2441 if (strcmp(path, "/") == 0) {
2442 if (filler(buf, "proc", NULL, 0) != 0 ||
2443 filler(buf, "cgroup", NULL, 0) != 0)
2444 return -EINVAL;
2445 return 0;
2446 }
35629743 2447 if (strncmp(path, "/cgroup", 7) == 0)
758ad80c 2448 return cg_readdir(path, buf, filler, offset, fi);
35629743
SH
2449 if (strcmp(path, "/proc") == 0)
2450 return proc_readdir(path, buf, filler, offset, fi);
758ad80c
SH
2451 return -EINVAL;
2452}
2453
2454static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
2455{
2456 if (strcmp(path, "/") == 0)
2457 return 0;
2458 if (strncmp(path, "/cgroup", 7) == 0) {
2459 return cg_releasedir(path, fi);
2460 }
35629743
SH
2461 if (strcmp(path, "/proc") == 0)
2462 return 0;
758ad80c
SH
2463 return -EINVAL;
2464}
2465
99978832
SH
2466static int lxcfs_open(const char *path, struct fuse_file_info *fi)
2467{
35629743 2468 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2469 return cg_open(path, fi);
35629743
SH
2470 if (strncmp(path, "/proc", 5) == 0)
2471 return proc_open(path, fi);
99978832
SH
2472
2473 return -EINVAL;
2474}
2475
2476static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
2477 struct fuse_file_info *fi)
2478{
35629743 2479 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2480 return cg_read(path, buf, size, offset, fi);
35629743
SH
2481 if (strncmp(path, "/proc", 5) == 0)
2482 return proc_read(path, buf, size, offset, fi);
99978832
SH
2483
2484 return -EINVAL;
2485}
2486
2ad6d2bd
SH
2487int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
2488 struct fuse_file_info *fi)
2489{
2490 if (strncmp(path, "/cgroup", 7) == 0) {
2491 return cg_write(path, buf, size, offset, fi);
2492 }
2493
2494 return -EINVAL;
2495}
2496
99978832
SH
2497static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
2498{
2499 return 0;
2500}
2501
2502static int lxcfs_release(const char *path, struct fuse_file_info *fi)
758ad80c 2503{
8f6e8f5e
SH
2504 if (strncmp(path, "/cgroup", 7) == 0)
2505 return cg_release(path, fi);
8f6e8f5e 2506 if (strncmp(path, "/proc", 5) == 0)
96fc5ee6 2507 return proc_release(path, fi);
8f6e8f5e
SH
2508
2509 return -EINVAL;
99978832
SH
2510}
2511
2512static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
2513{
2514 return 0;
758ad80c
SH
2515}
2516
ab54b798
SH
2517int lxcfs_mkdir(const char *path, mode_t mode)
2518{
2519 if (strncmp(path, "/cgroup", 7) == 0)
2520 return cg_mkdir(path, mode);
2521
2522 return -EINVAL;
2523}
2524
341b21ad
SH
2525int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
2526{
2527 if (strncmp(path, "/cgroup", 7) == 0)
2528 return cg_chown(path, uid, gid);
2529
2530 return -EINVAL;
2531}
2532
2ad6d2bd
SH
2533/*
2534 * cat first does a truncate before doing ops->write. This doesn't
2535 * really make sense for cgroups. So just return 0 always but do
2536 * nothing.
2537 */
2538int lxcfs_truncate(const char *path, off_t newsize)
2539{
2540 if (strncmp(path, "/cgroup", 7) == 0)
2541 return 0;
2542 return -EINVAL;
2543}
2544
50d8d5b5
SH
2545int lxcfs_rmdir(const char *path)
2546{
2547 if (strncmp(path, "/cgroup", 7) == 0)
2548 return cg_rmdir(path);
2549 return -EINVAL;
2550}
2551
fd2e4e03
SH
2552int lxcfs_chmod(const char *path, mode_t mode)
2553{
2554 if (strncmp(path, "/cgroup", 7) == 0)
2555 return cg_chmod(path, mode);
2556 return -EINVAL;
2557}
2558
758ad80c
SH
2559const struct fuse_operations lxcfs_ops = {
2560 .getattr = lxcfs_getattr,
2561 .readlink = NULL,
2562 .getdir = NULL,
2563 .mknod = NULL,
ab54b798 2564 .mkdir = lxcfs_mkdir,
758ad80c 2565 .unlink = NULL,
50d8d5b5 2566 .rmdir = lxcfs_rmdir,
758ad80c
SH
2567 .symlink = NULL,
2568 .rename = NULL,
2569 .link = NULL,
fd2e4e03 2570 .chmod = lxcfs_chmod,
341b21ad 2571 .chown = lxcfs_chown,
2ad6d2bd 2572 .truncate = lxcfs_truncate,
758ad80c 2573 .utime = NULL,
99978832
SH
2574
2575 .open = lxcfs_open,
2576 .read = lxcfs_read,
2577 .release = lxcfs_release,
2ad6d2bd 2578 .write = lxcfs_write,
99978832 2579
758ad80c 2580 .statfs = NULL,
99978832
SH
2581 .flush = lxcfs_flush,
2582 .fsync = lxcfs_fsync,
758ad80c
SH
2583
2584 .setxattr = NULL,
2585 .getxattr = NULL,
2586 .listxattr = NULL,
2587 .removexattr = NULL,
2588
2589 .opendir = lxcfs_opendir,
2590 .readdir = lxcfs_readdir,
2591 .releasedir = lxcfs_releasedir,
2592
2593 .fsyncdir = NULL,
2594 .init = NULL,
2595 .destroy = NULL,
2596 .access = NULL,
2597 .create = NULL,
2598 .ftruncate = NULL,
2599 .fgetattr = NULL,
2600};
2601
99978832 2602static void usage(const char *me)
758ad80c
SH
2603{
2604 fprintf(stderr, "Usage:\n");
2605 fprintf(stderr, "\n");
0b0f73db
SH
2606 fprintf(stderr, "%s mountpoint\n", me);
2607 fprintf(stderr, "%s -h\n", me);
758ad80c
SH
2608 exit(1);
2609}
2610
99978832 2611static bool is_help(char *w)
758ad80c
SH
2612{
2613 if (strcmp(w, "-h") == 0 ||
2614 strcmp(w, "--help") == 0 ||
2615 strcmp(w, "-help") == 0 ||
2616 strcmp(w, "help") == 0)
2617 return true;
2618 return false;
2619}
2620
0b0f73db
SH
2621void swallow_arg(int *argcp, char *argv[], char *which)
2622{
2623 int i;
2624
2625 for (i = 1; argv[i]; i++) {
2626 if (strcmp(argv[i], which) != 0)
2627 continue;
2628 for (; argv[i]; i++) {
2629 argv[i] = argv[i+1];
2630 }
2631 (*argcp)--;
2632 return;
2633 }
2634}
2635
2636void swallow_option(int *argcp, char *argv[], char *opt, char *v)
2637{
2638 int i;
2639
2640 for (i = 1; argv[i]; i++) {
2641 if (!argv[i+1])
2642 continue;
2643 if (strcmp(argv[i], opt) != 0)
2644 continue;
2645 if (strcmp(argv[i+1], v) != 0) {
2646 fprintf(stderr, "Warning: unexpected fuse option %s\n", v);
2647 exit(1);
2648 }
2649 for (; argv[i+1]; i++) {
2650 argv[i] = argv[i+2];
2651 }
2652 (*argcp) -= 2;
2653 return;
2654 }
2655}
2656
38a76a91
SH
2657bool detect_libnih_threadsafe(void)
2658{
2659#ifdef HAVE_NIH_THREADSAFE
2660 if (nih_threadsafe())
2661 return true;
2662#endif
2663 return false;
2664}
2665
758ad80c
SH
2666int main(int argc, char *argv[])
2667{
c0adec85 2668 int ret = -1;
e5d26e0b 2669 struct lxcfs_state *d = NULL;
0b0f73db
SH
2670 /*
2671 * what we pass to fuse_main is:
2672 * argv[0] -s -f -o allow_other,directio argv[1] NULL
2673 */
38a76a91
SH
2674 int nargs = 6;
2675 bool threadsafe = detect_libnih_threadsafe();
2676 char *newargv[7]; // one more than if needed if threadsafe
2677
67bd113f
SH
2678 dbus_threads_init_default();
2679
38a76a91
SH
2680 if (threadsafe)
2681 nargs = 5;
758ad80c 2682
0b0f73db
SH
2683 /* accomodate older init scripts */
2684 swallow_arg(&argc, argv, "-s");
2685 swallow_arg(&argc, argv, "-f");
2686 swallow_option(&argc, argv, "-o", "allow_other");
2687
2e9c0b32
SH
2688 if (argc == 2 && strcmp(argv[1], "--version") == 0) {
2689 fprintf(stderr, "%s\n", VERSION);
2690 exit(0);
2691 }
0b0f73db 2692 if (argc != 2 || is_help(argv[1]))
758ad80c
SH
2693 usage(argv[0]);
2694
0b0f73db
SH
2695 d = NIH_MUST( malloc(sizeof(*d)) );
2696
38a76a91
SH
2697 int cnt = 0;
2698 newargv[cnt++] = argv[0];
2699 if (!threadsafe)
2700 newargv[cnt++] = "-s";
2701 newargv[cnt++] = "-f";
2702 newargv[cnt++] = "-o";
2703 newargv[cnt++] = "allow_other,direct_io";
2704 newargv[cnt++] = argv[1];
2705 newargv[cnt++] = NULL;
758ad80c
SH
2706
2707 if (!cgm_escape_cgroup())
2708 fprintf(stderr, "WARNING: failed to escape to root cgroup\n");
2709
2710 if (!cgm_get_controllers(&d->subsystems))
c0adec85 2711 goto out;
758ad80c 2712
38a76a91 2713 ret = fuse_main(nargs, newargv, &lxcfs_ops, d);
758ad80c 2714
c0adec85 2715out:
e5d26e0b 2716 free(d);
758ad80c 2717 return ret;
2183082c 2718}