]> git.proxmox.com Git - mirror_lxcfs.git/blame - lxcfs.c
timeout the reception of a scm_cred
[mirror_lxcfs.git] / lxcfs.c
CommitLineData
758ad80c
SH
1/* lxcfs
2 *
3 * Copyright © 2014 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
f2799430 6 * See COPYING file for details.
758ad80c
SH
7 */
8
9/*
10 * NOTES - make sure to run this as -s to avoid threading.
11 * TODO - can we enforce that here from the code?
12 */
13#define FUSE_USE_VERSION 26
14
2183082c 15#include <stdio.h>
758ad80c
SH
16#include <dirent.h>
17#include <fcntl.h>
18#include <fuse.h>
19#include <unistd.h>
20#include <errno.h>
21#include <stdbool.h>
22#include <time.h>
23#include <string.h>
24#include <stdlib.h>
25#include <libgen.h>
41bb9357
SH
26#include <sched.h>
27#include <linux/sched.h>
a05660a6 28#include <sys/socket.h>
41bb9357
SH
29#include <sys/mount.h>
30#include <wait.h>
758ad80c
SH
31
32#include <nih/alloc.h>
33#include <nih/string.h>
34
35#include "cgmanager.h"
36
37struct lxcfs_state {
38 /*
39 * a null-terminated, nih-allocated list of the mounted subsystems. We
40 * detect this at startup.
41 */
42 char **subsystems;
43};
44#define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data)
45
4775fba1
SH
46/*
47 * TODO - return value should denote whether child exited with failure
48 * so callers can return errors. Esp read/write of tasks and cgroup.procs
49 */
a05660a6
SH
50static int wait_for_pid(pid_t pid)
51{
52 int status, ret;
53
54again:
55 ret = waitpid(pid, &status, 0);
56 if (ret == -1) {
57 if (errno == EINTR)
58 goto again;
59 return -1;
60 }
61 if (ret != pid)
62 goto again;
63 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
64 return -1;
65 return 0;
66}
67
053a659d
SH
68/*
69 * Given a open file * to /proc/pid/{u,g}id_map, and an id
70 * valid in the caller's namespace, return the id mapped into
71 * pid's namespace.
72 * Returns the mapped id, or -1 on error.
73 */
74unsigned int
75convert_id_to_ns(FILE *idfile, unsigned int in_id)
76{
77 unsigned int nsuid, // base id for a range in the idfile's namespace
78 hostuid, // base id for a range in the caller's namespace
79 count; // number of ids in this range
80 char line[400];
81 int ret;
82
83 fseek(idfile, 0L, SEEK_SET);
84 while (fgets(line, 400, idfile)) {
85 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
86 if (ret != 3)
87 continue;
88 if (hostuid + count < hostuid || nsuid + count < nsuid) {
89 /*
90 * uids wrapped around - unexpected as this is a procfile,
91 * so just bail.
92 */
647c89e5 93 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
053a659d
SH
94 nsuid, hostuid, count, line);
95 return -1;
96 }
97 if (hostuid <= in_id && hostuid+count > in_id) {
98 /*
99 * now since hostuid <= in_id < hostuid+count, and
100 * hostuid+count and nsuid+count do not wrap around,
101 * we know that nsuid+(in_id-hostuid) which must be
102 * less that nsuid+(count) must not wrap around
103 */
104 return (in_id - hostuid) + nsuid;
105 }
106 }
107
108 // no answer found
109 return -1;
110}
111
341b21ad
SH
112/*
113 * for is_privileged_over,
114 * specify whether we require the calling uid to be root in his
115 * namespace
116 */
117#define NS_ROOT_REQD true
118#define NS_ROOT_OPT false
119
120static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
758ad80c 121{
053a659d
SH
122 nih_local char *fpath = NULL;
123 bool answer = false;
124 uid_t nsuid;
125
341b21ad
SH
126 if (victim == -1 || uid == -1)
127 return false;
128
129 /*
130 * If the request is one not requiring root in the namespace,
131 * then having the same uid suffices. (i.e. uid 1000 has write
132 * access to files owned by uid 1000
133 */
134 if (!req_ns_root && uid == victim)
758ad80c
SH
135 return true;
136
053a659d
SH
137 fpath = NIH_MUST( nih_sprintf(NULL, "/proc/%d/uid_map", pid) );
138 FILE *f = fopen(fpath, "r");
139 if (!f)
140 return false;
141
341b21ad 142 /* if caller's not root in his namespace, reject */
053a659d
SH
143 nsuid = convert_id_to_ns(f, uid);
144 if (nsuid)
145 goto out;
146
341b21ad
SH
147 /*
148 * If victim is not mapped into caller's ns, reject.
149 * XXX I'm not sure this check is needed given that fuse
150 * will be sending requests where the vfs has converted
151 */
053a659d
SH
152 nsuid = convert_id_to_ns(f, victim);
153 if (nsuid == -1)
154 goto out;
155
156 answer = true;
157
158out:
159 fclose(f);
160 return answer;
758ad80c
SH
161}
162
163static bool perms_include(int fmode, mode_t req_mode)
164{
2ad6d2bd
SH
165 mode_t r;
166
167 switch (req_mode & O_ACCMODE) {
168 case O_RDONLY:
169 r = S_IROTH;
170 break;
171 case O_WRONLY:
172 r = S_IWOTH;
173 break;
174 case O_RDWR:
175 r = S_IROTH | S_IWOTH;
176 break;
177 default:
178 return false;
179 }
180 return ((fmode & r) == r);
758ad80c
SH
181}
182
3db25a35
SH
183static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
184{
185 char *start, *end;
186
187 if (strlen(taskcg) <= strlen(querycg)) {
188 fprintf(stderr, "%s: I was fed bad input\n", __func__);
189 return NULL;
190 }
191
192 if (strcmp(querycg, "/") == 0)
193 start = NIH_MUST( nih_strdup(NULL, taskcg + 1) );
194 else
195 start = NIH_MUST( nih_strdup(NULL, taskcg + strlen(querycg) + 1) );
196 end = strchr(start, '/');
197 if (end)
198 *end = '\0';
199 return start;
200}
201
758ad80c
SH
202/*
203 * check whether a fuse context may access a cgroup dir or file
204 *
205 * If file is not null, it is a cgroup file to check under cg.
206 * If file is null, then we are checking perms on cg itself.
207 *
208 * For files we can check the mode of the list_keys result.
209 * For cgroups, we must make assumptions based on the files under the
210 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
211 * yet.
212 */
213static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
214{
215 nih_local struct cgm_keys **list = NULL;
216 int i;
217
218 if (!file)
219 file = "tasks";
220
221 if (*file == '/')
222 file++;
223
224 if (!cgm_list_keys(contrl, cg, &list))
225 return false;
226 for (i = 0; list[i]; i++) {
227 if (strcmp(list[i]->name, file) == 0) {
228 struct cgm_keys *k = list[i];
341b21ad 229 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
758ad80c
SH
230 if (perms_include(k->mode >> 6, mode))
231 return true;
232 }
233 if (fc->gid == k->gid) {
234 if (perms_include(k->mode >> 3, mode))
235 return true;
236 }
237 return perms_include(k->mode, mode);
238 }
239 }
240
241 return false;
242}
243
3db25a35
SH
244static void stripnewline(char *x)
245{
246 size_t l = strlen(x);
247 if (l && x[l-1] == '\n')
248 x[l-1] = '\0';
249}
250
251/*
252 * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
253 * If caller is in /a, he may act on /a/b, but not on /b.
254 * if the answer is false and nextcg is not NULL, then *nextcg will point
255 * to a nih_alloc'd string containing the next cgroup directory under cg
256 */
257static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
258{
259 nih_local char *fnam = NULL;
260 FILE *f;
261 bool answer = false;
262 char *line = NULL;
263 size_t len = 0;
264
265 fnam = NIH_MUST( nih_sprintf(NULL, "/proc/%d/cgroup", pid) );
266 if (!(f = fopen(fnam, "r")))
267 return false;
268
269 while (getline(&line, &len, f) != -1) {
270 char *c1, *c2, *linecmp;
271 if (!line[0])
272 continue;
273 c1 = strchr(line, ':');
274 if (!c1)
275 goto out;
276 c1++;
277 c2 = strchr(c1, ':');
278 if (!c2)
279 goto out;
280 *c2 = '\0';
281 if (strcmp(c1, contrl) != 0)
282 continue;
283 c2++;
284 stripnewline(c2);
285 /*
286 * callers pass in '/' for root cgroup, otherwise they pass
287 * in a cgroup without leading '/'
288 */
289 linecmp = *cg == '/' ? c2 : c2+1;
290 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
291 if (nextcg)
292 *nextcg = get_next_cgroup_dir(linecmp, cg);
293 goto out;
294 }
295 answer = true;
296 goto out;
297 }
298
299out:
300 fclose(f);
301 free(line);
302 return answer;
303}
304
758ad80c
SH
305/*
306 * given /cgroup/freezer/a/b, return "freezer". this will be nih-allocated
307 * and needs to be nih_freed.
308 */
309static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
310{
311 const char *p1;
312 char *ret, *slash;
313
314 if (strlen(path) < 9)
315 return NULL;
316 p1 = path+8;
317 ret = nih_strdup(NULL, p1);
318 if (!ret)
319 return ret;
320 slash = strstr(ret, "/");
321 if (slash)
322 *slash = '\0';
323
324 /* verify that it is a subsystem */
325 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
326 int i;
327 if (!list) {
328 nih_free(ret);
329 return NULL;
330 }
331 for (i = 0; list[i]; i++) {
332 if (strcmp(list[i], ret) == 0)
333 return ret;
334 }
335 nih_free(ret);
336 return NULL;
337}
338
339/*
340 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
341 * Note that the returned value may include files (keynames) etc
342 */
343static const char *find_cgroup_in_path(const char *path)
344{
345 const char *p1;
346
347 if (strlen(path) < 9)
348 return NULL;
349 p1 = strstr(path+8, "/");
350 if (!p1)
351 return NULL;
352 return p1+1;
353}
354
355static bool is_child_cgroup(const char *contr, const char *dir, const char *f)
356{
357 nih_local char **list = NULL;
358 int i;
359
360 if (!f)
361 return false;
362 if (*f == '/')
363 f++;
364
365 if (!cgm_list_children(contr, dir, &list))
366 return false;
367 for (i = 0; list[i]; i++) {
368 if (strcmp(list[i], f) == 0)
369 return true;
370 }
371
372 return false;
373}
374
375static struct cgm_keys *get_cgroup_key(const char *contr, const char *dir, const char *f)
376{
377 nih_local struct cgm_keys **list = NULL;
378 struct cgm_keys *k;
379 int i;
380
381 if (!f)
382 return NULL;
383 if (*f == '/')
384 f++;
385 if (!cgm_list_keys(contr, dir, &list))
386 return NULL;
387 for (i = 0; list[i]; i++) {
388 if (strcmp(list[i]->name, f) == 0) {
389 k = NIH_MUST( nih_alloc(NULL, (sizeof(*k))) );
390 k->name = NIH_MUST( nih_strdup(k, list[i]->name) );
391 k->uid = list[i]->uid;
392 k->gid = list[i]->gid;
393 k->mode = list[i]->mode;
394 return k;
395 }
396 }
397
398 return NULL;
399}
400
401static void get_cgdir_and_path(const char *cg, char **dir, char **file)
402{
758ad80c
SH
403 char *p;
404
405 *dir = NIH_MUST( nih_strdup(NULL, cg) );
406 *file = strrchr(cg, '/');
407 if (!*file) {
408 *file = NULL;
409 return;
410 }
411 p = strrchr(*dir, '/');
412 *p = '\0';
413}
414
99978832
SH
415static size_t get_file_size(const char *contrl, const char *cg, const char *f)
416{
417 nih_local char *data = NULL;
418 size_t s;
419 if (!cgm_get_value(contrl, cg, f, &data))
420 return -EINVAL;
421 s = strlen(data);
422 return s;
423}
2ad6d2bd 424
758ad80c 425/*
2ad6d2bd 426 * FUSE ops for /cgroup
758ad80c 427 */
2ad6d2bd 428
758ad80c
SH
429static int cg_getattr(const char *path, struct stat *sb)
430{
431 struct timespec now;
432 struct fuse_context *fc = fuse_get_context();
433 nih_local char * cgdir = NULL;
434 char *fpath = NULL, *path1, *path2;
435 nih_local struct cgm_keys *k = NULL;
436 const char *cgroup;
437 nih_local char *controller = NULL;
438
439
440 if (!fc)
441 return -EIO;
442
443 memset(sb, 0, sizeof(struct stat));
444
445 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
446 return -EINVAL;
447
448 sb->st_uid = sb->st_gid = 0;
449 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
450 sb->st_size = 0;
451
452 if (strcmp(path, "/cgroup") == 0) {
453 sb->st_mode = S_IFDIR | 00755;
454 sb->st_nlink = 2;
455 return 0;
456 }
457
458 controller = pick_controller_from_path(fc, path);
459 if (!controller)
460 return -EIO;
758ad80c
SH
461 cgroup = find_cgroup_in_path(path);
462 if (!cgroup) {
463 /* this is just /cgroup/controller, return it as a dir */
464 sb->st_mode = S_IFDIR | 00755;
465 sb->st_nlink = 2;
466 return 0;
467 }
341b21ad 468
758ad80c
SH
469 get_cgdir_and_path(cgroup, &cgdir, &fpath);
470
471 if (!fpath) {
472 path1 = "/";
473 path2 = cgdir;
474 } else {
475 path1 = cgdir;
476 path2 = fpath;
477 }
478
758ad80c
SH
479 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
480 * Then check that caller's cgroup is under path if fpath is a child
481 * cgroup, or cgdir if fpath is a file */
482
483 if (is_child_cgroup(controller, path1, path2)) {
f9a05025
SH
484 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
485 /* this is just /cgroup/controller, return it as a dir */
486 sb->st_mode = S_IFDIR | 00555;
487 sb->st_nlink = 2;
488 return 0;
489 }
758ad80c 490 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
f9a05025 491 return -EACCES;
758ad80c 492
053a659d
SH
493 // get uid, gid, from '/tasks' file and make up a mode
494 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
495 sb->st_mode = S_IFDIR | 00755;
496 k = get_cgroup_key(controller, cgroup, "tasks");
497 if (!k) {
053a659d
SH
498 sb->st_uid = sb->st_gid = 0;
499 } else {
053a659d
SH
500 sb->st_uid = k->uid;
501 sb->st_gid = k->gid;
502 }
758ad80c
SH
503 sb->st_nlink = 2;
504 return 0;
505 }
506
507 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
3db25a35
SH
508 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL))
509 return -ENOENT;
758ad80c 510 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY))
f9a05025 511 return -EACCES;
758ad80c 512
758ad80c 513 sb->st_mode = S_IFREG | k->mode;
053a659d 514 sb->st_nlink = 1;
758ad80c
SH
515 sb->st_uid = k->uid;
516 sb->st_gid = k->gid;
99978832 517 sb->st_size = get_file_size(controller, path1, path2);
758ad80c
SH
518 return 0;
519 }
520
ab54b798 521 return -ENOENT;
758ad80c 522}
2183082c 523
7f163b71
SH
524/*
525 * TODO - cache these results in a table for use in opendir, free
526 * in releasedir
527 */
758ad80c 528static int cg_opendir(const char *path, struct fuse_file_info *fi)
2183082c 529{
7f163b71
SH
530 struct fuse_context *fc = fuse_get_context();
531 nih_local struct cgm_keys **list = NULL;
532 const char *cgroup;
533 nih_local char *controller = NULL;
7f163b71
SH
534 nih_local char *nextcg = NULL;
535
536 if (!fc)
537 return -EIO;
538
539 if (strcmp(path, "/cgroup") == 0)
540 return 0;
541
542 // return list of keys for the controller, and list of child cgroups
543 controller = pick_controller_from_path(fc, path);
544 if (!controller)
545 return -EIO;
546
547 cgroup = find_cgroup_in_path(path);
548 if (!cgroup) {
549 /* this is just /cgroup/controller, return its contents */
550 cgroup = "/";
551 }
552
553 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
554 return -EACCES;
758ad80c
SH
555 return 0;
556}
557
758ad80c
SH
558static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
559 struct fuse_file_info *fi)
560{
561 struct fuse_context *fc = fuse_get_context();
562
563 if (!fc)
564 return -EIO;
565
566 if (strcmp(path, "/cgroup") == 0) {
567 // get list of controllers
568 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
569 int i;
570
571 if (!list)
572 return -EIO;
7f163b71 573
758ad80c
SH
574 for (i = 0; list[i]; i++) {
575 if (filler(buf, list[i], NULL, 0) != 0) {
576 return -EIO;
577 }
578 }
579 return 0;
580 }
581
582 // return list of keys for the controller, and list of child cgroups
583 nih_local struct cgm_keys **list = NULL;
584 const char *cgroup;
585 nih_local char *controller = NULL;
586 int i;
3db25a35 587 nih_local char *nextcg = NULL;
758ad80c
SH
588
589 controller = pick_controller_from_path(fc, path);
590 if (!controller)
591 return -EIO;
592
593 cgroup = find_cgroup_in_path(path);
594 if (!cgroup) {
595 /* this is just /cgroup/controller, return its contents */
596 cgroup = "/";
597 }
598
599 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
f9a05025 600 return -EACCES;
758ad80c
SH
601
602 if (!cgm_list_keys(controller, cgroup, &list))
3db25a35 603 // not a valid cgroup
758ad80c 604 return -EINVAL;
3db25a35
SH
605
606 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, &nextcg)) {
607 if (nextcg) {
608 int ret;
609 ret = filler(buf, nextcg, NULL, 0);
610 if (ret != 0)
611 return -EIO;
612 }
613 return 0;
614 }
615
758ad80c 616 for (i = 0; list[i]; i++) {
758ad80c
SH
617 if (filler(buf, list[i]->name, NULL, 0) != 0) {
618 return -EIO;
619 }
620 }
621
622 // now get the list of child cgroups
422aa4a5 623 nih_local char **clist = NULL;
758ad80c
SH
624
625 if (!cgm_list_children(controller, cgroup, &clist))
626 return 0;
627 for (i = 0; clist[i]; i++) {
758ad80c
SH
628 if (filler(buf, clist[i], NULL, 0) != 0) {
629 return -EIO;
630 }
631 }
632 return 0;
633}
634
635static int cg_releasedir(const char *path, struct fuse_file_info *fi)
636{
637 return 0;
638}
639
26faa701
SH
640/*
641 * TODO - cache info here for read/write, release in cg_release.
642 */
99978832
SH
643static int cg_open(const char *path, struct fuse_file_info *fi)
644{
645 nih_local char *controller = NULL;
646 const char *cgroup;
647 char *fpath = NULL, *path1, *path2;
648 nih_local char * cgdir = NULL;
649 nih_local struct cgm_keys *k = NULL;
650 struct fuse_context *fc = fuse_get_context();
651
652 if (!fc)
653 return -EIO;
654
655 controller = pick_controller_from_path(fc, path);
656 if (!controller)
657 return -EIO;
658 cgroup = find_cgroup_in_path(path);
659 if (!cgroup)
660 return -EINVAL;
661
662 get_cgdir_and_path(cgroup, &cgdir, &fpath);
663 if (!fpath) {
664 path1 = "/";
665 path2 = cgdir;
666 } else {
667 path1 = cgdir;
668 path2 = fpath;
669 }
670
671 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
672 if (!fc_may_access(fc, controller, path1, path2, fi->flags))
f9a05025
SH
673 // should never get here
674 return -EACCES;
99978832 675
99978832
SH
676 return 0;
677 }
678
679 return -EINVAL;
680}
681
a05660a6
SH
682static int msgrecv(int sockfd, void *buf, size_t len)
683{
684 struct timeval tv;
685 fd_set rfds;
686
687 FD_ZERO(&rfds);
688 FD_SET(sockfd, &rfds);
689 tv.tv_sec = 2;
690 tv.tv_usec = 0;
691
692 if (select(sockfd+1, &rfds, NULL, NULL, &tv) < 0)
693 return -1;
694 return recv(sockfd, buf, len, MSG_DONTWAIT);
695}
696
01e71852
SH
697#define SEND_CREDS_OK 0
698#define SEND_CREDS_NOTSK 1
699#define SEND_CREDS_FAIL 2
700static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
a05660a6
SH
701{
702 struct msghdr msg = { 0 };
703 struct iovec iov;
704 struct cmsghdr *cmsg;
705 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
706 char buf[1];
707 buf[0] = 'p';
708
01e71852
SH
709 if (pingfirst) {
710 if (msgrecv(sock, buf, 1) != 1) {
1420baf8 711 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
01e71852
SH
712 __func__);
713 return SEND_CREDS_FAIL;
714 }
a05660a6
SH
715 }
716
717 msg.msg_control = cmsgbuf;
718 msg.msg_controllen = sizeof(cmsgbuf);
719
720 cmsg = CMSG_FIRSTHDR(&msg);
721 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
722 cmsg->cmsg_level = SOL_SOCKET;
723 cmsg->cmsg_type = SCM_CREDENTIALS;
724 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
725
726 msg.msg_name = NULL;
727 msg.msg_namelen = 0;
728
729 buf[0] = v;
730 iov.iov_base = buf;
731 iov.iov_len = sizeof(buf);
732 msg.msg_iov = &iov;
733 msg.msg_iovlen = 1;
734
735 if (sendmsg(sock, &msg, 0) < 0) {
1420baf8 736 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
a05660a6
SH
737 strerror(errno));
738 if (errno == 3)
01e71852
SH
739 return SEND_CREDS_NOTSK;
740 return SEND_CREDS_FAIL;
a05660a6
SH
741 }
742
01e71852 743 return SEND_CREDS_OK;
a05660a6
SH
744}
745
746static bool recv_creds(int sock, struct ucred *cred, char *v)
747{
748 struct msghdr msg = { 0 };
749 struct iovec iov;
750 struct cmsghdr *cmsg;
751 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
752 char buf[1];
753 int ret;
754 int optval = 1;
6ee867dc
SH
755 struct timeval tv;
756 fd_set rfds;
a05660a6
SH
757
758 *v = '1';
759
760 cred->pid = -1;
761 cred->uid = -1;
762 cred->gid = -1;
763
764 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
1420baf8 765 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
a05660a6
SH
766 return false;
767 }
768 buf[0] = '1';
769 if (write(sock, buf, 1) != 1) {
1420baf8 770 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
a05660a6
SH
771 return false;
772 }
773
774 msg.msg_name = NULL;
775 msg.msg_namelen = 0;
776 msg.msg_control = cmsgbuf;
777 msg.msg_controllen = sizeof(cmsgbuf);
778
779 iov.iov_base = buf;
780 iov.iov_len = sizeof(buf);
781 msg.msg_iov = &iov;
782 msg.msg_iovlen = 1;
783
6ee867dc
SH
784 FD_ZERO(&rfds);
785 FD_SET(sock, &rfds);
786 tv.tv_sec = 2;
787 tv.tv_usec = 0;
788 if (select(sock+1, &rfds, NULL, NULL, &tv) < 0) {
789 fprintf(stderr, "Failed to select for scm_cred: %s\n",
790 strerror(errno));
791 return false;
792 }
793 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
a05660a6 794 if (ret < 0) {
1420baf8 795 fprintf(stderr, "Failed to receive scm_cred: %s\n",
a05660a6
SH
796 strerror(errno));
797 return false;
798 }
799
800 cmsg = CMSG_FIRSTHDR(&msg);
801
802 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
803 cmsg->cmsg_level == SOL_SOCKET &&
804 cmsg->cmsg_type == SCM_CREDENTIALS) {
805 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
806 }
807 *v = buf[0];
808
809 return true;
810}
811
812
813/*
4775fba1
SH
814 * pid_to_ns - reads pids from a ucred over a socket, then writes the
815 * int value back over the socket. This shifts the pid from the
816 * sender's pidns into tpid's pidns.
a05660a6 817 */
4775fba1 818static void pid_to_ns(int sock, pid_t tpid)
a05660a6
SH
819{
820 char v = '0';
821 struct ucred cred;
822
823 while (recv_creds(sock, &cred, &v)) {
824 if (v == '1')
825 exit(0);
a05660a6
SH
826 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
827 exit(1);
828 }
829 exit(0);
830}
831
832/*
4775fba1 833 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
a05660a6 834 * in your old pidns. Only children which you fork will be in the target
4775fba1 835 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
a05660a6
SH
836 * actually convert pids
837 */
4775fba1 838static void pid_to_ns_wrapper(int sock, pid_t tpid)
a05660a6
SH
839{
840 int newnsfd = -1;
841 char fnam[100];
842 pid_t cpid;
843
844 sprintf(fnam, "/proc/%d/ns/pid", tpid);
845 newnsfd = open(fnam, O_RDONLY);
846 if (newnsfd < 0)
847 exit(1);
848 if (setns(newnsfd, 0) < 0)
849 exit(1);
850 close(newnsfd);
851
852 cpid = fork();
853
854 if (cpid < 0)
855 exit(1);
856 if (!cpid)
4775fba1 857 pid_to_ns(sock, tpid);
a05660a6
SH
858 if (!wait_for_pid(cpid))
859 exit(1);
860 exit(0);
861}
862
863/*
864 * To read cgroup files with a particular pid, we will setns into the child
865 * pidns, open a pipe, fork a child - which will be the first to really be in
866 * the child ns - which does the cgm_get_value and writes the data to the pipe.
867 */
868static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
869{
870 int sock[2] = {-1, -1};
871 nih_local char *tmpdata = NULL;
872 int ret;
873 pid_t qpid, cpid = -1;
874 bool answer = false;
875 char v = '0';
876 struct ucred cred;
877 struct timeval tv;
878 fd_set s;
879
880 if (!cgm_get_value(contrl, cg, file, &tmpdata))
881 return false;
882
883 /*
884 * Now we read the pids from returned data one by one, pass
885 * them into a child in the target namespace, read back the
886 * translated pids, and put them into our to-return data
887 */
888
889 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
890 perror("socketpair");
891 exit(1);
892 }
893
894 cpid = fork();
895 if (cpid == -1)
896 goto out;
897
898 if (!cpid) // child
4775fba1 899 pid_to_ns_wrapper(sock[1], tpid);
a05660a6
SH
900
901 char *ptr = tmpdata;
902 cred.uid = 0;
903 cred.gid = 0;
904 while (sscanf(ptr, "%d\n", &qpid) == 1) {
905 cred.pid = qpid;
01e71852
SH
906 ret = send_creds(sock[0], &cred, v, true);
907
908 if (ret == SEND_CREDS_NOTSK)
909 goto next;
910 if (ret == SEND_CREDS_FAIL)
a05660a6
SH
911 goto out;
912
913 // read converted results
914 FD_ZERO(&s);
915 FD_SET(sock[0], &s);
6ee867dc 916 tv.tv_sec = 2;
a05660a6
SH
917 tv.tv_usec = 0;
918 ret = select(sock[0]+1, &s, NULL, NULL, &tv);
919 if (ret <= 0) {
6ee867dc
SH
920 fprintf(stderr, "%s: select error waiting for pid from child: %s\n",
921 __func__, strerror(errno));
a05660a6
SH
922 goto out;
923 }
924 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
925 fprintf(stderr, "%s: error reading pid from child: %s\n",
926 __func__, strerror(errno));
a05660a6
SH
927 goto out;
928 }
a05660a6 929 NIH_MUST( nih_strcat_sprintf(d, NULL, "%d\n", qpid) );
01e71852 930next:
a05660a6
SH
931 ptr = strchr(ptr, '\n');
932 if (!ptr)
933 break;
934 ptr++;
935 }
936
937 cred.pid = getpid();
938 v = '1';
01e71852 939 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
a05660a6 940 // failed to ask child to exit
6ee867dc
SH
941 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
942 __func__, strerror(errno));
a05660a6
SH
943 goto out;
944 }
945
946 answer = true;
947
948out:
949 if (cpid != -1)
950 wait_for_pid(cpid);
951 if (sock[0] != -1) {
952 close(sock[0]);
953 close(sock[1]);
954 }
955 return answer;
956}
957
99978832
SH
958static int cg_read(const char *path, char *buf, size_t size, off_t offset,
959 struct fuse_file_info *fi)
960{
961 nih_local char *controller = NULL;
962 const char *cgroup;
963 char *fpath = NULL, *path1, *path2;
964 struct fuse_context *fc = fuse_get_context();
965 nih_local char * cgdir = NULL;
966 nih_local struct cgm_keys *k = NULL;
967
968 if (offset)
969 return -EIO;
970
971 if (!fc)
972 return -EIO;
973
974 controller = pick_controller_from_path(fc, path);
975 if (!controller)
f9a05025 976 return -EINVAL;
99978832
SH
977 cgroup = find_cgroup_in_path(path);
978 if (!cgroup)
979 return -EINVAL;
980
981 get_cgdir_and_path(cgroup, &cgdir, &fpath);
982 if (!fpath) {
983 path1 = "/";
984 path2 = cgdir;
985 } else {
986 path1 = cgdir;
987 path2 = fpath;
988 }
989
990 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
991 nih_local char *data = NULL;
4775fba1
SH
992 int s;
993 bool r;
99978832 994
2ad6d2bd 995 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY))
f9a05025
SH
996 // should never get here
997 return -EACCES;
99978832 998
a05660a6
SH
999 if (strcmp(path2, "tasks") == 0 ||
1000 strcmp(path2, "/tasks") == 0 ||
1001 strcmp(path2, "/cgroup.procs") == 0 ||
1002 strcmp(path2, "cgroup.procs") == 0)
1003 // special case - we have to translate the pids
4775fba1 1004 r = do_read_pids(fc->pid, controller, path1, path2, &data);
a05660a6 1005 else
4775fba1 1006 r = cgm_get_value(controller, path1, path2, &data);
a05660a6 1007
4775fba1 1008 if (!r)
99978832
SH
1009 return -EINVAL;
1010
4775fba1
SH
1011 if (!data)
1012 return 0;
99978832
SH
1013 s = strlen(data);
1014 if (s > size)
1015 s = size;
1016 memcpy(buf, data, s);
1017
99978832
SH
1018 return s;
1019 }
1020
1021 return -EINVAL;
1022}
1023
4775fba1
SH
1024static void pid_from_ns(int sock, pid_t tpid)
1025{
1026 pid_t vpid;
1027 struct ucred cred;
1028 char v;
6ee867dc
SH
1029 struct timeval tv;
1030 fd_set s;
1031 int ret;
4775fba1
SH
1032
1033 cred.uid = 0;
1034 cred.gid = 0;
6ee867dc
SH
1035 while (1) {
1036 FD_ZERO(&s);
1037 FD_SET(sock, &s);
1038 tv.tv_sec = 2;
1039 tv.tv_usec = 0;
1040 ret = select(sock+1, &s, NULL, NULL, &tv);
1041 if (ret < 0) {
1042 fprintf(stderr, "%s: bad jelect before read from parent: %s\n",
1043 __func__, strerror(errno));
1044 exit(1);
1045 }
1046 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
1047 fprintf(stderr, "%s: bad read from parent: %s\n",
1048 __func__, strerror(errno));
1049 exit(1);
1050 }
4775fba1 1051 if (vpid == -1) // done
01e71852 1052 break;
4775fba1
SH
1053 v = '0';
1054 cred.pid = vpid;
01e71852 1055 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
4775fba1
SH
1056 v = '1';
1057 cred.pid = getpid();
01e71852 1058 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
4775fba1
SH
1059 exit(1);
1060 }
1061 }
1062 exit(0);
1063}
1064
1065static void pid_from_ns_wrapper(int sock, pid_t tpid)
1066{
1067 int newnsfd = -1;
1068 char fnam[100];
1069 pid_t cpid;
1070
1071 sprintf(fnam, "/proc/%d/ns/pid", tpid);
1072 newnsfd = open(fnam, O_RDONLY);
1073 if (newnsfd < 0)
1074 exit(1);
1075 if (setns(newnsfd, 0) < 0)
1076 exit(1);
1077 close(newnsfd);
1078
1079 cpid = fork();
1080
1081 if (cpid < 0)
1082 exit(1);
1083 if (!cpid)
1084 pid_from_ns(sock, tpid);
1085 if (!wait_for_pid(cpid))
1086 exit(1);
1087 exit(0);
1088}
1089
1090static bool do_write_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, const char *buf)
1091{
1092 int sock[2] = {-1, -1};
1093 pid_t qpid, cpid = -1;
1094 bool answer = false, fail = false;
1095
1096 /*
1097 * write the pids to a socket, have helper in writer's pidns
1098 * call movepid for us
1099 */
1100 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1101 perror("socketpair");
1102 exit(1);
1103 }
1104
1105 cpid = fork();
1106 if (cpid == -1)
1107 goto out;
1108
1109 if (!cpid) // child
1110 pid_from_ns_wrapper(sock[1], tpid);
1111
1112 const char *ptr = buf;
1113 while (sscanf(ptr, "%d", &qpid) == 1) {
1114 struct ucred cred;
1115 char v;
1116
1117 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1118 fprintf(stderr, "%s: error writing pid to child: %s\n",
1119 __func__, strerror(errno));
4775fba1
SH
1120 goto out;
1121 }
1122
01e71852
SH
1123 if (recv_creds(sock[0], &cred, &v)) {
1124 if (v == '0') {
1125 if (!cgm_move_pid(contrl, cg, cred.pid))
1126 fail = true;
1127 }
4775fba1
SH
1128 }
1129
1130 ptr = strchr(ptr, '\n');
1131 if (!ptr)
1132 break;
1133 ptr++;
1134 }
1135
1136 /* All good, write the value */
1137 qpid = -1;
1138 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
1420baf8 1139 fprintf(stderr, "Warning: failed to ask child to exit\n");
4775fba1
SH
1140
1141 if (!fail)
1142 answer = true;
1143
1144out:
1145 if (cpid != -1)
1146 wait_for_pid(cpid);
1147 if (sock[0] != -1) {
1148 close(sock[0]);
1149 close(sock[1]);
1150 }
1151 return answer;
1152}
1153
2ad6d2bd
SH
1154int cg_write(const char *path, const char *buf, size_t size, off_t offset,
1155 struct fuse_file_info *fi)
1156{
1157 nih_local char *controller = NULL;
1158 const char *cgroup;
1159 char *fpath = NULL, *path1, *path2;
1160 struct fuse_context *fc = fuse_get_context();
1161 nih_local char * cgdir = NULL;
1162 nih_local struct cgm_keys *k = NULL;
47cbf0e5 1163 nih_local char *localbuf = NULL;
2ad6d2bd 1164
2ad6d2bd 1165 if (offset)
f9a05025 1166 return -EINVAL;
2ad6d2bd
SH
1167
1168 if (!fc)
1169 return -EIO;
1170
47cbf0e5
SH
1171 localbuf = NIH_MUST( nih_alloc(NULL, size+1) );
1172 localbuf[size] = '\0';
1173 memcpy(localbuf, buf, size);
2ad6d2bd
SH
1174 controller = pick_controller_from_path(fc, path);
1175 if (!controller)
f9a05025 1176 return -EINVAL;
2ad6d2bd
SH
1177 cgroup = find_cgroup_in_path(path);
1178 if (!cgroup)
1179 return -EINVAL;
1180
1181 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1182 if (!fpath) {
1183 path1 = "/";
1184 path2 = cgdir;
1185 } else {
1186 path1 = cgdir;
1187 path2 = fpath;
1188 }
1189
1190 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
4775fba1
SH
1191 bool r;
1192
2ad6d2bd 1193 if (!fc_may_access(fc, controller, path1, path2, O_WRONLY))
f9a05025 1194 return -EACCES;
2ad6d2bd 1195
4775fba1
SH
1196 if (strcmp(path2, "tasks") == 0 ||
1197 strcmp(path2, "/tasks") == 0 ||
1198 strcmp(path2, "/cgroup.procs") == 0 ||
1199 strcmp(path2, "cgroup.procs") == 0)
1200 // special case - we have to translate the pids
47cbf0e5 1201 r = do_write_pids(fc->pid, controller, path1, path2, localbuf);
4775fba1 1202 else
47cbf0e5 1203 r = cgm_set_value(controller, path1, path2, localbuf);
4775fba1
SH
1204
1205 if (!r)
2ad6d2bd
SH
1206 return -EINVAL;
1207
1208 return size;
1209 }
1210
1211 return -EINVAL;
1212}
1213
341b21ad
SH
1214int cg_chown(const char *path, uid_t uid, gid_t gid)
1215{
1216 struct fuse_context *fc = fuse_get_context();
1217 nih_local char * cgdir = NULL;
1218 char *fpath = NULL, *path1, *path2;
1219 nih_local struct cgm_keys *k = NULL;
1220 const char *cgroup;
1221 nih_local char *controller = NULL;
1222
1223
1224 if (!fc)
1225 return -EIO;
1226
1227 if (strcmp(path, "/cgroup") == 0)
1228 return -EINVAL;
1229
1230 controller = pick_controller_from_path(fc, path);
1231 if (!controller)
f9a05025 1232 return -EINVAL;
341b21ad
SH
1233 cgroup = find_cgroup_in_path(path);
1234 if (!cgroup)
1235 /* this is just /cgroup/controller */
1236 return -EINVAL;
1237
1238 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1239
1240 if (!fpath) {
1241 path1 = "/";
1242 path2 = cgdir;
1243 } else {
1244 path1 = cgdir;
1245 path2 = fpath;
1246 }
1247
1248 if (is_child_cgroup(controller, path1, path2)) {
1249 // get uid, gid, from '/tasks' file and make up a mode
1250 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1251 k = get_cgroup_key(controller, cgroup, "tasks");
1252
1253 } else
1254 k = get_cgroup_key(controller, path1, path2);
1255
1256 if (!k)
1257 return -EINVAL;
1258
1259 /*
1260 * This being a fuse request, the uid and gid must be valid
1261 * in the caller's namespace. So we can just check to make
1262 * sure that the caller is root in his uid, and privileged
1263 * over the file's current owner.
1264 */
1265 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD))
f9a05025 1266 return -EACCES;
341b21ad
SH
1267
1268 if (!cgm_chown_file(controller, cgroup, uid, gid))
1269 return -EINVAL;
1270 return 0;
1271}
2ad6d2bd 1272
fd2e4e03
SH
1273int cg_chmod(const char *path, mode_t mode)
1274{
0a1bb5ea
SH
1275 struct fuse_context *fc = fuse_get_context();
1276 nih_local char * cgdir = NULL;
1277 char *fpath = NULL, *path1, *path2;
1278 nih_local struct cgm_keys *k = NULL;
1279 const char *cgroup;
1280 nih_local char *controller = NULL;
1281
1282 if (!fc)
1283 return -EIO;
1284
1285 if (strcmp(path, "/cgroup") == 0)
1286 return -EINVAL;
1287
1288 controller = pick_controller_from_path(fc, path);
1289 if (!controller)
f9a05025 1290 return -EINVAL;
0a1bb5ea
SH
1291 cgroup = find_cgroup_in_path(path);
1292 if (!cgroup)
1293 /* this is just /cgroup/controller */
1294 return -EINVAL;
1295
1296 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1297
1298 if (!fpath) {
1299 path1 = "/";
1300 path2 = cgdir;
1301 } else {
1302 path1 = cgdir;
1303 path2 = fpath;
1304 }
1305
1306 if (is_child_cgroup(controller, path1, path2)) {
1307 // get uid, gid, from '/tasks' file and make up a mode
1308 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1309 k = get_cgroup_key(controller, cgroup, "tasks");
1310
1311 } else
1312 k = get_cgroup_key(controller, path1, path2);
1313
1314 if (!k)
1315 return -EINVAL;
1316
1317 /*
1318 * This being a fuse request, the uid and gid must be valid
1319 * in the caller's namespace. So we can just check to make
1320 * sure that the caller is root in his uid, and privileged
1321 * over the file's current owner.
1322 */
1323 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT))
1324 return -EPERM;
1325
1326 if (!cgm_chmod_file(controller, cgroup, mode))
1327 return -EINVAL;
1328 return 0;
fd2e4e03
SH
1329}
1330
ab54b798
SH
1331int cg_mkdir(const char *path, mode_t mode)
1332{
1333 struct fuse_context *fc = fuse_get_context();
1334 nih_local struct cgm_keys **list = NULL;
1335 char *fpath = NULL, *path1;
1336 nih_local char * cgdir = NULL;
1337 const char *cgroup;
1338 nih_local char *controller = NULL;
1339
ab54b798
SH
1340 if (!fc)
1341 return -EIO;
1342
1343
1344 controller = pick_controller_from_path(fc, path);
1345 if (!controller)
f9a05025 1346 return -EINVAL;
ab54b798
SH
1347
1348 cgroup = find_cgroup_in_path(path);
1349 if (!cgroup)
f9a05025 1350 return -EINVAL;
ab54b798
SH
1351
1352 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1353 if (!fpath)
1354 path1 = "/";
1355 else
1356 path1 = cgdir;
1357
1358 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR))
f9a05025 1359 return -EACCES;
ab54b798
SH
1360
1361
1362 if (!cgm_create(controller, cgroup, fc->uid, fc->gid))
1363 return -EINVAL;
1364
1365 return 0;
1366}
1367
50d8d5b5
SH
1368static int cg_rmdir(const char *path)
1369{
1370 struct fuse_context *fc = fuse_get_context();
1371 nih_local struct cgm_keys **list = NULL;
1372 char *fpath = NULL;
1373 nih_local char * cgdir = NULL;
1374 const char *cgroup;
1375 nih_local char *controller = NULL;
1376
1377 if (!fc)
1378 return -EIO;
1379
1380
1381 controller = pick_controller_from_path(fc, path);
1382 if (!controller)
f9a05025 1383 return -EINVAL;
50d8d5b5
SH
1384
1385 cgroup = find_cgroup_in_path(path);
1386 if (!cgroup)
f9a05025 1387 return -EINVAL;
50d8d5b5
SH
1388
1389 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1390 if (!fpath)
1391 return -EINVAL;
1392
1393 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY))
f9a05025 1394 return -EACCES;
50d8d5b5
SH
1395
1396 if (!cgm_remove(controller, cgroup))
1397 return -EINVAL;
1398
1399 return 0;
1400}
1401
2dc17609
SH
1402static bool startswith(const char *line, const char *pref)
1403{
1404 if (strncmp(line, pref, strlen(pref)) == 0)
1405 return true;
1406 return false;
1407}
1408
1409static void get_mem_cached(char *memstat, unsigned long *v)
1410{
1411 char *eol;
1412
1413 *v = 0;
1414 while (*memstat) {
1415 if (startswith(memstat, "total_cache")) {
1416 sscanf(memstat + 11, "%lu", v);
1417 *v /= 1024;
1418 return;
1419 }
1420 eol = strchr(memstat, '\n');
1421 if (!eol)
1422 return;
1423 memstat = eol+1;
1424 }
1425}
1426
49878439
YY
1427static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
1428{
1429 char *eol;
1430 char key[32];
1431
1432 memset(key, 0, 32);
1433 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
1434
1435 size_t len = strlen(key);
1436 *v = 0;
1437
1438 while (*str) {
1439 if (startswith(str, key)) {
1440 sscanf(str + len, "%lu", v);
1441 return;
1442 }
1443 eol = strchr(str, '\n');
1444 if (!eol)
1445 return;
1446 str = eol+1;
1447 }
1448}
1449
2dc17609
SH
1450static char *get_pid_cgroup(pid_t pid, const char *contrl)
1451{
1452 nih_local char *fnam = NULL;
1453 FILE *f;
1454 char *answer = NULL;
1455 char *line = NULL;
1456 size_t len = 0;
1457
1458 fnam = NIH_MUST( nih_sprintf(NULL, "/proc/%d/cgroup", pid) );
1459 if (!(f = fopen(fnam, "r")))
1460 return false;
1461
1462 while (getline(&line, &len, f) != -1) {
1463 char *c1, *c2;
1464 if (!line[0])
1465 continue;
1466 c1 = strchr(line, ':');
1467 if (!c1)
1468 goto out;
1469 c1++;
1470 c2 = strchr(c1, ':');
1471 if (!c2)
1472 goto out;
1473 *c2 = '\0';
1474 if (strcmp(c1, contrl) != 0)
1475 continue;
1476 c2++;
1477 stripnewline(c2);
1478 answer = NIH_MUST( nih_strdup(NULL, c2) );
1479 goto out;
1480 }
1481
1482out:
1483 fclose(f);
1484 free(line);
1485 return answer;
1486}
1487
758ad80c 1488/*
2ad6d2bd 1489 * FUSE ops for /proc
758ad80c 1490 */
758ad80c 1491
23ce2127
SH
1492static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1493 struct fuse_file_info *fi)
1494{
2dc17609
SH
1495 struct fuse_context *fc = fuse_get_context();
1496 nih_local char *cg = get_pid_cgroup(fc->pid, "memory");
1497 nih_local char *memlimit_str = NULL, *memusage_str = NULL, *memstat_str = NULL;
1498 unsigned long memlimit = 0, memusage = 0, cached = 0, hosttotal = 0;
1499 char *line = NULL;
1500 size_t linelen = 0, total_len = 0;
1501 FILE *f;
1502
1503 if (offset)
1504 return -EINVAL;
1505
1506 if (!cg)
1507 return 0;
1508
1509 if (!cgm_get_value("memory", cg, "memory.limit_in_bytes", &memlimit_str))
1510 return 0;
1511 if (!cgm_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
1512 return 0;
1513 if (!cgm_get_value("memory", cg, "memory.stat", &memstat_str))
1514 return 0;
1515 memlimit = strtoul(memlimit_str, NULL, 10);
1516 memusage = strtoul(memusage_str, NULL, 10);
1517 memlimit /= 1024;
1518 memusage /= 1024;
1519 get_mem_cached(memstat_str, &cached);
1520
1521 f = fopen("/proc/meminfo", "r");
1522 if (!f)
1523 return 0;
1524
1525 while (getline(&line, &linelen, f) != -1) {
1526 size_t l;
1527 char *printme, lbuf[100];
1528
1529 memset(lbuf, 0, 100);
1530 if (startswith(line, "MemTotal:")) {
1531 sscanf(line+14, "%lu", &hosttotal);
1532 if (hosttotal < memlimit)
1533 memlimit = hosttotal;
1534 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
1535 printme = lbuf;
1536 } else if (startswith(line, "MemFree:")) {
1537 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
1538 printme = lbuf;
1539 } else if (startswith(line, "MemAvailable:")) {
1540 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
1541 printme = lbuf;
1542 } else if (startswith(line, "Buffers:")) {
1543 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
1544 printme = lbuf;
1545 } else if (startswith(line, "Cached:")) {
1546 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
1547 printme = lbuf;
1548 } else if (startswith(line, "SwapCached:")) {
1549 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
1550 printme = lbuf;
1551 } else
1552 printme = line;
1553 l = snprintf(buf, size, "%s", printme);
1554 buf += l;
1555 size -= l;
1556 total_len += l;
1557 }
1558
92c84dc4
SH
1559 fclose(f);
1560 free(line);
2dc17609 1561 return total_len;
23ce2127
SH
1562}
1563
1564/*
1565 * Read the cpuset.cpus for cg
1566 * Return the answer in a nih_alloced string
1567 */
1568static char *get_cpuset(const char *cg)
1569{
1570 char *answer;
1571
1572 if (!cgm_get_value("cpuset", cg, "cpuset.cpus", &answer))
1573 return NULL;
1574 return answer;
1575}
1576
1577/*
1578 * Helper functions for cpuset_in-set
1579 */
1580char *cpuset_nexttok(const char *c)
1581{
1582 char *r = strchr(c+1, ',');
1583 if (r)
1584 return r+1;
1585 return NULL;
1586}
1587
1588int cpuset_getrange(const char *c, int *a, int *b)
1589{
1590 int ret;
1591
1592 ret = sscanf(c, "%d-%d", a, b);
1593 return ret;
1594}
1595
1596/*
1597 * cpusets are in format "1,2-3,4"
1598 * iow, comma-delimited ranges
1599 */
aeb56147 1600static bool cpu_in_cpuset(int cpu, const char *cpuset)
23ce2127 1601{
23ce2127
SH
1602 const char *c;
1603
23ce2127
SH
1604 for (c = cpuset; c; c = cpuset_nexttok(c)) {
1605 int a, b, ret;
1606
1607 ret = cpuset_getrange(c, &a, &b);
1608 if (ret == 1 && cpu == a)
1609 return true;
1610 if (ret != 2) // bad cpuset!
1611 return false;
1612 if (cpu >= a && cpu <= b)
1613 return true;
1614 }
1615
1616 return false;
1617}
1618
aeb56147
SH
1619static bool cpuline_in_cpuset(const char *line, const char *cpuset)
1620{
1621 int cpu;
1622
1623 if (sscanf(line, "processor : %d", &cpu) != 1)
1624 return false;
1625 return cpu_in_cpuset(cpu, cpuset);
1626}
1627
23ce2127
SH
1628/*
1629 * check whether this is a '^processor" line in /proc/cpuinfo
1630 */
1631static bool is_processor_line(const char *line)
1632{
1633 int cpu;
1634
1635 if (sscanf(line, "processor : %d", &cpu) == 1)
1636 return true;
1637 return false;
1638}
1639
23ce2127
SH
1640static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
1641 struct fuse_file_info *fi)
1642{
1643 struct fuse_context *fc = fuse_get_context();
1644 nih_local char *cg = get_pid_cgroup(fc->pid, "cpuset");
1645 nih_local char *cpuset = NULL;
1646 char *line = NULL;
1647 size_t linelen = 0, total_len = 0;
1648 bool am_printing = false;
1649 int curcpu = -1;
1650 FILE *f;
1651
1652 if (offset)
1653 return -EINVAL;
1654
1655 if (!cg)
1656 return 0;
1657
1658 cpuset = get_cpuset(cg);
1659 if (!cpuset)
1660 return 0;
1661
1662 f = fopen("/proc/cpuinfo", "r");
1663 if (!f)
1664 return 0;
1665
1666 while (getline(&line, &linelen, f) != -1) {
1667 size_t l;
1668 if (is_processor_line(line)) {
aeb56147 1669 am_printing = cpuline_in_cpuset(line, cpuset);
23ce2127
SH
1670 if (am_printing) {
1671 curcpu ++;
1672 l = snprintf(buf, size, "processor : %d\n", curcpu);
1673 buf += l;
1674 size -= l;
1675 total_len += l;
1676 }
1677 continue;
1678 }
1679 if (am_printing) {
1680 l = snprintf(buf, size, "%s", line);
1681 buf += l;
1682 size -= l;
1683 total_len += l;
1684 }
1685 }
1686
92c84dc4
SH
1687 fclose(f);
1688 free(line);
23ce2127
SH
1689 return total_len;
1690}
1691
1692static int proc_stat_read(char *buf, size_t size, off_t offset,
1693 struct fuse_file_info *fi)
1694{
aeb56147
SH
1695 struct fuse_context *fc = fuse_get_context();
1696 nih_local char *cg = get_pid_cgroup(fc->pid, "cpuset");
1697 nih_local char *cpuset = NULL;
1698 char *line = NULL;
1699 size_t linelen = 0, total_len = 0;
2a0fde62 1700 int curcpu = -1; /* cpu numbering starts at 0 */
aeb56147
SH
1701 FILE *f;
1702
1703 if (offset)
1704 return -EINVAL;
1705
1706 if (!cg)
1707 return 0;
1708
1709 cpuset = get_cpuset(cg);
1710 if (!cpuset)
1711 return 0;
1712
1713 f = fopen("/proc/stat", "r");
1714 if (!f)
1715 return 0;
1716
1717 while (getline(&line, &linelen, f) != -1) {
1718 size_t l;
1719 int cpu;
2a0fde62 1720 char cpu_char[10]; /* That's a lot of cores */
aeb56147
SH
1721 char *c;
1722
2a0fde62
CB
1723 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
1724 /* not a ^cpuN line containing a number N, just print it */
aeb56147
SH
1725 l = snprintf(buf, size, "%s", line);
1726 buf += l;
1727 size -= l;
1728 total_len += l;
1729 continue;
1730 }
2a0fde62
CB
1731
1732 if (sscanf(cpu_char, "%d", &cpu) != 1)
1733 continue;
aeb56147
SH
1734 if (!cpu_in_cpuset(cpu, cpuset))
1735 continue;
1736 curcpu ++;
1737
1738 c = strchr(line, ' ');
1739 if (!c)
1740 continue;
1741 l = snprintf(buf, size, "cpu%d %s", curcpu, c);
1742 buf += l;
1743 size -= l;
1744 total_len += l;
1745 }
1746
92c84dc4
SH
1747 fclose(f);
1748 free(line);
aeb56147 1749 return total_len;
23ce2127
SH
1750}
1751
7bbf2246
SH
1752/*
1753 * How to guess what to present for uptime?
1754 * One thing we could do would be to take the date on the caller's
1755 * memory.usage_in_bytes file, which should equal the time of creation
1756 * of his cgroup. However, a task could be in a sub-cgroup of the
1757 * container. The same problem exists if we try to look at the ages
1758 * of processes in the caller's cgroup.
1759 *
1760 * So we'll fork a task that will enter the caller's pidns, mount a
1761 * fresh procfs, get the age of /proc/1, and pass that back over a pipe.
1762 *
1763 * For the second uptime #, we'll do as Stéphane had done, just copy
1764 * the number from /proc/uptime. Not sure how to best emulate 'idle'
1765 * time. Maybe someone can come up with a good algorithm and submit a
1766 * patch. Maybe something based on cpushare info?
1767 */
41bb9357
SH
1768
1769/* return age of the reaper for $pid, taken from ctime of its procdir */
1770static long int get_pid1_time(pid_t pid)
1771{
1772 char fnam[100];
1773 int fd;
1774 struct stat sb;
1775 int ret;
1776 pid_t npid;
1777
1778 if (unshare(CLONE_NEWNS))
1779 return 0;
1780
5ca64c2a
SG
1781 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
1782 perror("rslave mount failed");
1783 return 0;
1784 }
1785
41bb9357
SH
1786 sprintf(fnam, "/proc/%d/ns/pid", pid);
1787 fd = open(fnam, O_RDONLY);
1788 if (fd < 0) {
1789 perror("get_pid1_time open of ns/pid");
1790 return 0;
1791 }
1792 if (setns(fd, 0)) {
1793 perror("get_pid1_time setns 1");
1794 close(fd);
1795 return 0;
1796 }
1797 close(fd);
1798 npid = fork();
1799 if (npid < 0)
1800 return 0;
1801
1802 if (npid) {
1803 // child will do the writing for us
1804 wait_for_pid(npid);
1805 exit(0);
1806 }
1807
1808 umount2("/proc", MNT_DETACH);
1809
1810 if (mount("proc", "/proc", "proc", 0, NULL)) {
1811 perror("get_pid1_time mount");
1812 return 0;
1813 }
1814 ret = lstat("/proc/1", &sb);
1815 if (ret) {
1816 perror("get_pid1_time lstat");
1817 return 0;
1818 }
1819 return time(NULL) - sb.st_ctime;
1820}
1821
1822static long int getreaperage(pid_t qpid)
1823{
1824 int pid, mypipe[2], ret;
1825 struct timeval tv;
1826 fd_set s;
1827 long int mtime, answer = 0;
1828
1829 if (pipe(mypipe)) {
1830 return 0;
1831 }
1832
1833 pid = fork();
1834
1835 if (!pid) { // child
1836 mtime = get_pid1_time(qpid);
1837 if (write(mypipe[1], &mtime, sizeof(mtime)) != sizeof(mtime))
1838 fprintf(stderr, "Warning: bad write from getreaperage\n");
1839 exit(0);
1840 }
1841
1842 close(mypipe[1]);
1843 FD_ZERO(&s);
1844 FD_SET(mypipe[0], &s);
1845 tv.tv_sec = 1;
1846 tv.tv_usec = 0;
1847 ret = select(mypipe[0]+1, &s, NULL, NULL, &tv);
1848 if (ret == -1) {
1849 perror("select");
1850 goto out;
1851 }
1852 if (!ret) {
1420baf8 1853 fprintf(stderr, "timed out\n");
41bb9357
SH
1854 goto out;
1855 }
1856 if (read(mypipe[0], &mtime, sizeof(mtime)) != sizeof(mtime)) {
1857 perror("read");
1858 goto out;
1859 }
1860 answer = mtime;
1861
1862out:
1863 wait_for_pid(pid);
1864 close(mypipe[0]);
1865 return answer;
1866}
1867
1868static long int getprocidle(void)
1869{
1870 FILE *f = fopen("/proc/uptime", "r");
1871 long int age, idle;
92c84dc4 1872 int ret;
41bb9357
SH
1873 if (!f)
1874 return 0;
92c84dc4
SH
1875 ret = fscanf(f, "%ld %ld", &age, &idle);
1876 fclose(f);
1877 if (ret != 2)
41bb9357
SH
1878 return 0;
1879 return idle;
1880}
1881
1882/*
1883 * We read /proc/uptime and reuse its second field.
1884 * For the first field, we use the mtime for the reaper for
1885 * the calling pid as returned by getreaperage
1886 */
23ce2127
SH
1887static int proc_uptime_read(char *buf, size_t size, off_t offset,
1888 struct fuse_file_info *fi)
1889{
41bb9357
SH
1890 struct fuse_context *fc = fuse_get_context();
1891 long int reaperage = getreaperage(fc->pid);;
1892 long int idletime = getprocidle();
1893
1894 if (offset)
1895 return -EINVAL;
1896 return snprintf(buf, size, "%ld %ld\n", reaperage, idletime);
23ce2127
SH
1897}
1898
49878439
YY
1899static int proc_diskstats_read(char *buf, size_t size, off_t offset,
1900 struct fuse_file_info *fi)
1901{
1902 char dev_name[72];
1903 struct fuse_context *fc = fuse_get_context();
1904 nih_local char *cg = get_pid_cgroup(fc->pid, "blkio");
1905 nih_local char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
1906 *io_wait_time_str = NULL, *io_service_time_str = NULL;
1907 unsigned long read = 0, write = 0;
1908 unsigned long read_merged = 0, write_merged = 0;
1909 unsigned long read_sectors = 0, write_sectors = 0;
1910 unsigned long read_ticks = 0, write_ticks = 0;
1911 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
1912 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
1913 char *line = NULL;
1914 size_t linelen = 0, total_len = 0;
1915 unsigned int major = 0, minor = 0;
1916 int i = 0;
1917 FILE *f;
1918
1919 if (offset)
1920 return -EINVAL;
1921
1922 if (!cg)
1923 return 0;
1924
1925 if (!cgm_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
1926 return 0;
1927 if (!cgm_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
1928 return 0;
1929 if (!cgm_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
1930 return 0;
1931 if (!cgm_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
1932 return 0;
1933 if (!cgm_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
1934 return 0;
1935
1936
1937 f = fopen("/proc/diskstats", "r");
1938 if (!f)
1939 return 0;
1940
1941 while (getline(&line, &linelen, f) != -1) {
1942 size_t l;
1943 char *printme, lbuf[256];
1944
1945 i = sscanf(line, "%u %u %s", &major, &minor, dev_name);
1946 if(i == 3){
1947 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
1948 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
1949 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
1950 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
1951 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
1952 read_sectors = read_sectors/512;
1953 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
1954 write_sectors = write_sectors/512;
1955
1956 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
1957 rd_svctm = rd_svctm/1000000;
1958 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
1959 rd_wait = rd_wait/1000000;
1960 read_ticks = rd_svctm + rd_wait;
1961
1962 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
1963 wr_svctm = wr_svctm/1000000;
1964 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
1965 wr_wait = wr_wait/1000000;
1966 write_ticks = wr_svctm + wr_wait;
1967
1968 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
1969 tot_ticks = tot_ticks/1000000;
1970 }else{
1971 continue;
1972 }
1973
1974 memset(lbuf, 0, 256);
1975 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
1976 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
1977 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
1978 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
1979 printme = lbuf;
1980 } else
1981 continue;
1982
1983 l = snprintf(buf, size, "%s", printme);
1984 buf += l;
1985 size -= l;
1986 total_len += l;
1987 }
1988
1989 fclose(f);
1990 free(line);
1991 return total_len;
1992}
1993
23ce2127
SH
1994static off_t get_procfile_size(const char *which)
1995{
1996 FILE *f = fopen(which, "r");
1997 char *line = NULL;
1998 size_t len = 0;
1999 ssize_t sz, answer = 0;
2000 if (!f)
2001 return 0;
2002
2003 while ((sz = getline(&line, &len, f)) != -1)
2004 answer += sz;
2005 fclose (f);
92c84dc4 2006 free(line);
23ce2127
SH
2007
2008 return answer;
2009}
2010
758ad80c
SH
2011static int proc_getattr(const char *path, struct stat *sb)
2012{
35629743
SH
2013 struct timespec now;
2014
2015 memset(sb, 0, sizeof(struct stat));
2016 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
2017 return -EINVAL;
2018 sb->st_uid = sb->st_gid = 0;
2019 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
2020 if (strcmp(path, "/proc") == 0) {
2021 sb->st_mode = S_IFDIR | 00555;
2022 sb->st_nlink = 2;
2023 return 0;
2024 }
2025 if (strcmp(path, "/proc/meminfo") == 0 ||
2026 strcmp(path, "/proc/cpuinfo") == 0 ||
2027 strcmp(path, "/proc/uptime") == 0 ||
49878439
YY
2028 strcmp(path, "/proc/stat") == 0 ||
2029 strcmp(path, "/proc/diskstats") == 0) {
23ce2127 2030 sb->st_size = get_procfile_size(path);
35629743
SH
2031 sb->st_mode = S_IFREG | 00444;
2032 sb->st_nlink = 1;
2033 return 0;
2034 }
2035
2036 return -ENOENT;
2037}
2038
2039static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2040 struct fuse_file_info *fi)
2041{
2042 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
2043 filler(buf, "meminfo", NULL, 0) != 0 ||
2044 filler(buf, "stat", NULL, 0) != 0 ||
49878439
YY
2045 filler(buf, "uptime", NULL, 0) != 0 ||
2046 filler(buf, "diskstats", NULL, 0) != 0)
758ad80c 2047 return -EINVAL;
758ad80c
SH
2048 return 0;
2049}
2050
35629743
SH
2051static int proc_open(const char *path, struct fuse_file_info *fi)
2052{
2053 if (strcmp(path, "/proc/meminfo") == 0 ||
2054 strcmp(path, "/proc/cpuinfo") == 0 ||
2055 strcmp(path, "/proc/uptime") == 0 ||
49878439
YY
2056 strcmp(path, "/proc/stat") == 0 ||
2057 strcmp(path, "/proc/diskstats") == 0)
35629743
SH
2058 return 0;
2059 return -ENOENT;
2060}
2061
35629743
SH
2062static int proc_read(const char *path, char *buf, size_t size, off_t offset,
2063 struct fuse_file_info *fi)
2064{
2065 if (strcmp(path, "/proc/meminfo") == 0)
23ce2127 2066 return proc_meminfo_read(buf, size, offset, fi);
35629743 2067 if (strcmp(path, "/proc/cpuinfo") == 0)
23ce2127 2068 return proc_cpuinfo_read(buf, size, offset, fi);
35629743 2069 if (strcmp(path, "/proc/uptime") == 0)
23ce2127 2070 return proc_uptime_read(buf, size, offset, fi);
35629743 2071 if (strcmp(path, "/proc/stat") == 0)
23ce2127 2072 return proc_stat_read(buf, size, offset, fi);
49878439
YY
2073 if (strcmp(path, "/proc/diskstats") == 0)
2074 return proc_diskstats_read(buf, size, offset, fi);
35629743
SH
2075 return -EINVAL;
2076}
2077
2ad6d2bd
SH
2078/*
2079 * FUSE ops for /
2080 * these just delegate to the /proc and /cgroup ops as
2081 * needed
2082 */
758ad80c
SH
2083
2084static int lxcfs_getattr(const char *path, struct stat *sb)
2085{
2086 if (strcmp(path, "/") == 0) {
2087 sb->st_mode = S_IFDIR | 00755;
2088 sb->st_nlink = 2;
2089 return 0;
2090 }
2091 if (strncmp(path, "/cgroup", 7) == 0) {
2092 return cg_getattr(path, sb);
2093 }
35629743 2094 if (strncmp(path, "/proc", 5) == 0) {
758ad80c
SH
2095 return proc_getattr(path, sb);
2096 }
2097 return -EINVAL;
2098}
2099
2100static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
2101{
2102 if (strcmp(path, "/") == 0)
2103 return 0;
2104
2105 if (strncmp(path, "/cgroup", 7) == 0) {
2106 return cg_opendir(path, fi);
2107 }
35629743
SH
2108 if (strcmp(path, "/proc") == 0)
2109 return 0;
2110 return -ENOENT;
758ad80c
SH
2111}
2112
2113static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2114 struct fuse_file_info *fi)
2115{
2116 if (strcmp(path, "/") == 0) {
2117 if (filler(buf, "proc", NULL, 0) != 0 ||
2118 filler(buf, "cgroup", NULL, 0) != 0)
2119 return -EINVAL;
2120 return 0;
2121 }
35629743 2122 if (strncmp(path, "/cgroup", 7) == 0)
758ad80c 2123 return cg_readdir(path, buf, filler, offset, fi);
35629743
SH
2124 if (strcmp(path, "/proc") == 0)
2125 return proc_readdir(path, buf, filler, offset, fi);
758ad80c
SH
2126 return -EINVAL;
2127}
2128
2129static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
2130{
2131 if (strcmp(path, "/") == 0)
2132 return 0;
2133 if (strncmp(path, "/cgroup", 7) == 0) {
2134 return cg_releasedir(path, fi);
2135 }
35629743
SH
2136 if (strcmp(path, "/proc") == 0)
2137 return 0;
758ad80c
SH
2138 return -EINVAL;
2139}
2140
99978832
SH
2141static int lxcfs_open(const char *path, struct fuse_file_info *fi)
2142{
35629743 2143 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2144 return cg_open(path, fi);
35629743
SH
2145 if (strncmp(path, "/proc", 5) == 0)
2146 return proc_open(path, fi);
99978832
SH
2147
2148 return -EINVAL;
2149}
2150
2151static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
2152 struct fuse_file_info *fi)
2153{
35629743 2154 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2155 return cg_read(path, buf, size, offset, fi);
35629743
SH
2156 if (strncmp(path, "/proc", 5) == 0)
2157 return proc_read(path, buf, size, offset, fi);
99978832
SH
2158
2159 return -EINVAL;
2160}
2161
2ad6d2bd
SH
2162int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
2163 struct fuse_file_info *fi)
2164{
2165 if (strncmp(path, "/cgroup", 7) == 0) {
2166 return cg_write(path, buf, size, offset, fi);
2167 }
2168
2169 return -EINVAL;
2170}
2171
99978832
SH
2172static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
2173{
2174 return 0;
2175}
2176
2177static int lxcfs_release(const char *path, struct fuse_file_info *fi)
758ad80c 2178{
99978832
SH
2179 return 0;
2180}
2181
2182static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
2183{
2184 return 0;
758ad80c
SH
2185}
2186
ab54b798
SH
2187int lxcfs_mkdir(const char *path, mode_t mode)
2188{
2189 if (strncmp(path, "/cgroup", 7) == 0)
2190 return cg_mkdir(path, mode);
2191
2192 return -EINVAL;
2193}
2194
341b21ad
SH
2195int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
2196{
2197 if (strncmp(path, "/cgroup", 7) == 0)
2198 return cg_chown(path, uid, gid);
2199
2200 return -EINVAL;
2201}
2202
2ad6d2bd
SH
2203/*
2204 * cat first does a truncate before doing ops->write. This doesn't
2205 * really make sense for cgroups. So just return 0 always but do
2206 * nothing.
2207 */
2208int lxcfs_truncate(const char *path, off_t newsize)
2209{
2210 if (strncmp(path, "/cgroup", 7) == 0)
2211 return 0;
2212 return -EINVAL;
2213}
2214
50d8d5b5
SH
2215int lxcfs_rmdir(const char *path)
2216{
2217 if (strncmp(path, "/cgroup", 7) == 0)
2218 return cg_rmdir(path);
2219 return -EINVAL;
2220}
2221
fd2e4e03
SH
2222int lxcfs_chmod(const char *path, mode_t mode)
2223{
2224 if (strncmp(path, "/cgroup", 7) == 0)
2225 return cg_chmod(path, mode);
2226 return -EINVAL;
2227}
2228
758ad80c
SH
2229const struct fuse_operations lxcfs_ops = {
2230 .getattr = lxcfs_getattr,
2231 .readlink = NULL,
2232 .getdir = NULL,
2233 .mknod = NULL,
ab54b798 2234 .mkdir = lxcfs_mkdir,
758ad80c 2235 .unlink = NULL,
50d8d5b5 2236 .rmdir = lxcfs_rmdir,
758ad80c
SH
2237 .symlink = NULL,
2238 .rename = NULL,
2239 .link = NULL,
fd2e4e03 2240 .chmod = lxcfs_chmod,
341b21ad 2241 .chown = lxcfs_chown,
2ad6d2bd 2242 .truncate = lxcfs_truncate,
758ad80c 2243 .utime = NULL,
99978832
SH
2244
2245 .open = lxcfs_open,
2246 .read = lxcfs_read,
2247 .release = lxcfs_release,
2ad6d2bd 2248 .write = lxcfs_write,
99978832 2249
758ad80c 2250 .statfs = NULL,
99978832
SH
2251 .flush = lxcfs_flush,
2252 .fsync = lxcfs_fsync,
758ad80c
SH
2253
2254 .setxattr = NULL,
2255 .getxattr = NULL,
2256 .listxattr = NULL,
2257 .removexattr = NULL,
2258
2259 .opendir = lxcfs_opendir,
2260 .readdir = lxcfs_readdir,
2261 .releasedir = lxcfs_releasedir,
2262
2263 .fsyncdir = NULL,
2264 .init = NULL,
2265 .destroy = NULL,
2266 .access = NULL,
2267 .create = NULL,
2268 .ftruncate = NULL,
2269 .fgetattr = NULL,
2270};
2271
99978832 2272static void usage(const char *me)
758ad80c
SH
2273{
2274 fprintf(stderr, "Usage:\n");
2275 fprintf(stderr, "\n");
2276 fprintf(stderr, "%s [FUSE and mount options] mountpoint\n", me);
2277 exit(1);
2278}
2279
99978832 2280static bool is_help(char *w)
758ad80c
SH
2281{
2282 if (strcmp(w, "-h") == 0 ||
2283 strcmp(w, "--help") == 0 ||
2284 strcmp(w, "-help") == 0 ||
2285 strcmp(w, "help") == 0)
2286 return true;
2287 return false;
2288}
2289
2290int main(int argc, char *argv[])
2291{
2292 int ret;
2293 struct lxcfs_state *d;
2294
2295 if (argc < 2 || is_help(argv[1]))
2296 usage(argv[0]);
2297
2298 d = malloc(sizeof(*d));
2299 if (!d)
2300 return -1;
2301
2302 if (!cgm_escape_cgroup())
2303 fprintf(stderr, "WARNING: failed to escape to root cgroup\n");
2304
2305 if (!cgm_get_controllers(&d->subsystems))
2306 return -1;
2307
2308 ret = fuse_main(argc, argv, &lxcfs_ops, d);
2309
2310 return ret;
2183082c 2311}