]> git.proxmox.com Git - mirror_lxcfs.git/blame - lxcfs.c
remove stale comments, update some
[mirror_lxcfs.git] / lxcfs.c
CommitLineData
758ad80c
SH
1/* lxcfs
2 *
2c51f8dd 3 * Copyright © 2014,2015 Canonical, Inc
758ad80c
SH
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
f2799430 6 * See COPYING file for details.
758ad80c
SH
7 */
8
758ad80c
SH
9#define FUSE_USE_VERSION 26
10
2183082c 11#include <stdio.h>
758ad80c
SH
12#include <dirent.h>
13#include <fcntl.h>
14#include <fuse.h>
15#include <unistd.h>
16#include <errno.h>
17#include <stdbool.h>
18#include <time.h>
19#include <string.h>
20#include <stdlib.h>
21#include <libgen.h>
41bb9357
SH
22#include <sched.h>
23#include <linux/sched.h>
a05660a6 24#include <sys/socket.h>
41bb9357
SH
25#include <sys/mount.h>
26#include <wait.h>
758ad80c 27
758ad80c 28#include "cgmanager.h"
2e9c0b32 29#include "config.h" // for VERSION
758ad80c
SH
30
31struct lxcfs_state {
32 /*
2c51f8dd 33 * a null-terminated list of the mounted subsystems. We
758ad80c
SH
34 * detect this at startup.
35 */
36 char **subsystems;
37};
38#define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data)
39
443d13f5
SH
40enum {
41 LXC_TYPE_CGDIR,
42 LXC_TYPE_CGFILE,
43 LXC_TYPE_PROC_MEMINFO,
44 LXC_TYPE_PROC_CPUINFO,
45 LXC_TYPE_PROC_UPTIME,
46 LXC_TYPE_PROC_STAT,
47 LXC_TYPE_PROC_DISKSTATS,
48};
49
c688e1b3
SH
50struct file_info {
51 char *controller;
52 char *cgroup;
8f6e8f5e 53 char *file;
443d13f5 54 int type;
c688e1b3
SH
55 char *buf; // unused as of yet
56 int buflen;
97f1f27b 57 int size; //actual data size
c688e1b3
SH
58};
59
97f1f27b
YY
60/* reserve buffer size, for cpuall in /proc/stat */
61#define BUF_RESERVE_SIZE 256
62
2c51f8dd
SH
63/*
64 * append pid to *src.
65 * src: a pointer to a char* in which ot append the pid.
66 * sz: the number of characters printed so far, minus trailing \0.
67 * asz: the allocated size so far
68 * pid: the pid to append
69 */
70static void must_strcat_pid(char **src, size_t *sz, size_t *asz, pid_t pid)
71{
72 char *d = *src;
73 char tmp[30];
74
75 sprintf(tmp, "%d\n", (int)pid);
76
77 if (!d) {
78 do {
79 d = malloc(BUF_RESERVE_SIZE);
80 } while (!d);
81 *src = d;
82 *asz = BUF_RESERVE_SIZE;
83 } else if (strlen(tmp) + sz + 1 >= asz) {
84 do {
85 d = realloc(d, *asz + BUF_RESERVE_SIZE);
86 } while (!d);
87 *src = d;
88 *asz += BUF_RESERVE_SIZE;
89 }
90 memcpy(d+*sz, tmp, strlen(tmp));
91 *sz += strlen(tmp);
92 d[*sz] = '\0';
93}
94
bae07053 95static char *must_copy_string(void *parent, const char *str)
c688e1b3 96{
2c51f8dd 97 char *dup = NULL;
c688e1b3
SH
98 if (!str)
99 return NULL;
2c51f8dd
SH
100 do {
101 dup = strdup(str);
102 } while (!dup);
103
104 return dup;
c688e1b3
SH
105}
106
a05660a6
SH
107static int wait_for_pid(pid_t pid)
108{
109 int status, ret;
110
111again:
112 ret = waitpid(pid, &status, 0);
113 if (ret == -1) {
114 if (errno == EINTR)
115 goto again;
116 return -1;
117 }
118 if (ret != pid)
119 goto again;
120 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
121 return -1;
122 return 0;
123}
124
053a659d
SH
125/*
126 * Given a open file * to /proc/pid/{u,g}id_map, and an id
127 * valid in the caller's namespace, return the id mapped into
128 * pid's namespace.
129 * Returns the mapped id, or -1 on error.
130 */
131unsigned int
132convert_id_to_ns(FILE *idfile, unsigned int in_id)
133{
134 unsigned int nsuid, // base id for a range in the idfile's namespace
135 hostuid, // base id for a range in the caller's namespace
136 count; // number of ids in this range
137 char line[400];
138 int ret;
139
140 fseek(idfile, 0L, SEEK_SET);
141 while (fgets(line, 400, idfile)) {
142 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
143 if (ret != 3)
144 continue;
145 if (hostuid + count < hostuid || nsuid + count < nsuid) {
146 /*
147 * uids wrapped around - unexpected as this is a procfile,
148 * so just bail.
149 */
647c89e5 150 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
053a659d
SH
151 nsuid, hostuid, count, line);
152 return -1;
153 }
154 if (hostuid <= in_id && hostuid+count > in_id) {
155 /*
156 * now since hostuid <= in_id < hostuid+count, and
157 * hostuid+count and nsuid+count do not wrap around,
158 * we know that nsuid+(in_id-hostuid) which must be
159 * less that nsuid+(count) must not wrap around
160 */
161 return (in_id - hostuid) + nsuid;
162 }
163 }
164
165 // no answer found
166 return -1;
167}
168
341b21ad
SH
169/*
170 * for is_privileged_over,
171 * specify whether we require the calling uid to be root in his
172 * namespace
173 */
174#define NS_ROOT_REQD true
175#define NS_ROOT_OPT false
176
2c51f8dd
SH
177#define PROCLEN 100
178
341b21ad 179static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
758ad80c 180{
2c51f8dd
SH
181 char fpath[PROCLEN];
182 int ret;
053a659d
SH
183 bool answer = false;
184 uid_t nsuid;
185
341b21ad
SH
186 if (victim == -1 || uid == -1)
187 return false;
188
189 /*
190 * If the request is one not requiring root in the namespace,
191 * then having the same uid suffices. (i.e. uid 1000 has write
192 * access to files owned by uid 1000
193 */
194 if (!req_ns_root && uid == victim)
758ad80c
SH
195 return true;
196
2c51f8dd
SH
197 ret = snprintf(fpath, PROCLEN, "/proc/%d/uid_map", pid);
198 if (ret < 0 || ret >= PROCLEN)
199 return false;
053a659d
SH
200 FILE *f = fopen(fpath, "r");
201 if (!f)
202 return false;
203
341b21ad 204 /* if caller's not root in his namespace, reject */
053a659d
SH
205 nsuid = convert_id_to_ns(f, uid);
206 if (nsuid)
207 goto out;
208
341b21ad
SH
209 /*
210 * If victim is not mapped into caller's ns, reject.
211 * XXX I'm not sure this check is needed given that fuse
212 * will be sending requests where the vfs has converted
213 */
053a659d
SH
214 nsuid = convert_id_to_ns(f, victim);
215 if (nsuid == -1)
216 goto out;
217
218 answer = true;
219
220out:
221 fclose(f);
222 return answer;
758ad80c
SH
223}
224
225static bool perms_include(int fmode, mode_t req_mode)
226{
2ad6d2bd
SH
227 mode_t r;
228
229 switch (req_mode & O_ACCMODE) {
230 case O_RDONLY:
231 r = S_IROTH;
232 break;
233 case O_WRONLY:
234 r = S_IWOTH;
235 break;
236 case O_RDWR:
237 r = S_IROTH | S_IWOTH;
238 break;
239 default:
240 return false;
241 }
242 return ((fmode & r) == r);
758ad80c
SH
243}
244
3db25a35
SH
245static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
246{
247 char *start, *end;
248
249 if (strlen(taskcg) <= strlen(querycg)) {
250 fprintf(stderr, "%s: I was fed bad input\n", __func__);
251 return NULL;
252 }
253
254 if (strcmp(querycg, "/") == 0)
2c51f8dd 255 start = strdup(taskcg + 1);
3db25a35 256 else
2c51f8dd
SH
257 start = strdup(taskcg + strlen(querycg) + 1);
258 if (!start)
259 return NULL;
3db25a35
SH
260 end = strchr(start, '/');
261 if (end)
262 *end = '\0';
263 return start;
264}
265
2c51f8dd
SH
266static void stripnewline(char *x)
267{
268 size_t l = strlen(x);
269 if (l && x[l-1] == '\n')
270 x[l-1] = '\0';
271}
272
273static char *get_pid_cgroup(pid_t pid, const char *contrl)
274{
275 char fnam[PROCLEN];
276 FILE *f;
277 char *answer = NULL;
278 char *line = NULL;
279 size_t len = 0;
280 int ret;
281
282 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
283 if (ret < 0 || ret >= PROCLEN)
284 return NULL;
285 if (!(f = fopen(fnam, "r")))
286 return NULL;
287
288 while (getline(&line, &len, f) != -1) {
289 char *c1, *c2;
290 if (!line[0])
291 continue;
292 c1 = strchr(line, ':');
293 if (!c1)
294 goto out;
295 c1++;
296 c2 = strchr(c1, ':');
297 if (!c2)
298 goto out;
299 *c2 = '\0';
300 if (strcmp(c1, contrl) != 0)
301 continue;
302 c2++;
303 stripnewline(c2);
304 do {
305 answer = strdup(c2);
306 } while (!answer);
307 break;
308 }
309
310out:
311 fclose(f);
312 free(line);
313 return answer;
314}
315
758ad80c
SH
316/*
317 * check whether a fuse context may access a cgroup dir or file
318 *
319 * If file is not null, it is a cgroup file to check under cg.
320 * If file is null, then we are checking perms on cg itself.
321 *
322 * For files we can check the mode of the list_keys result.
323 * For cgroups, we must make assumptions based on the files under the
324 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
325 * yet.
326 */
327static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
328{
2c51f8dd
SH
329 struct cgm_keys **list = NULL;
330 bool ret = false;
758ad80c
SH
331 int i;
332
333 if (!file)
334 file = "tasks";
335
336 if (*file == '/')
337 file++;
338
339 if (!cgm_list_keys(contrl, cg, &list))
340 return false;
341 for (i = 0; list[i]; i++) {
342 if (strcmp(list[i]->name, file) == 0) {
343 struct cgm_keys *k = list[i];
341b21ad 344 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
2c51f8dd
SH
345 if (perms_include(k->mode >> 6, mode)) {
346 ret = true;
347 goto out;
348 }
758ad80c
SH
349 }
350 if (fc->gid == k->gid) {
2c51f8dd
SH
351 if (perms_include(k->mode >> 3, mode)) {
352 ret = true;
353 goto out;
354 }
758ad80c 355 }
2c51f8dd
SH
356 ret = perms_include(k->mode, mode);
357 goto out;
758ad80c
SH
358 }
359 }
360
2c51f8dd
SH
361out:
362 free_keys(list);
363 return ret;
3db25a35
SH
364}
365
04b5cbdc
SH
366#define INITSCOPE "/init.scope"
367static void prune_init_slice(char *cg)
368{
369 char *point;
370 point = cg + strlen(cg) - strlen(INITSCOPE);
371 if (point < cg)
372 return;
373 if (strcmp(point, INITSCOPE) == 0) {
374 if (point == cg)
375 *(point+1) = '\0';
376 else
377 *point = '\0';
378 }
379}
380
3db25a35
SH
381/*
382 * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
383 * If caller is in /a, he may act on /a/b, but not on /b.
384 * if the answer is false and nextcg is not NULL, then *nextcg will point
2c51f8dd
SH
385 * to a string containing the next cgroup directory under cg, which must be
386 * freed by the caller.
3db25a35
SH
387 */
388static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
389{
2c51f8dd 390 char fnam[PROCLEN];
3db25a35
SH
391 FILE *f;
392 bool answer = false;
393 char *line = NULL;
394 size_t len = 0;
2c51f8dd 395 int ret;
3db25a35 396
2c51f8dd
SH
397 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
398 if (ret < 0 || ret >= PROCLEN)
399 return false;
3db25a35
SH
400 if (!(f = fopen(fnam, "r")))
401 return false;
402
403 while (getline(&line, &len, f) != -1) {
404 char *c1, *c2, *linecmp;
405 if (!line[0])
406 continue;
407 c1 = strchr(line, ':');
408 if (!c1)
409 goto out;
410 c1++;
411 c2 = strchr(c1, ':');
412 if (!c2)
413 goto out;
414 *c2 = '\0';
415 if (strcmp(c1, contrl) != 0)
416 continue;
417 c2++;
418 stripnewline(c2);
04b5cbdc 419 prune_init_slice(c2);
3db25a35
SH
420 /*
421 * callers pass in '/' for root cgroup, otherwise they pass
422 * in a cgroup without leading '/'
423 */
424 linecmp = *cg == '/' ? c2 : c2+1;
425 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
426 if (nextcg)
427 *nextcg = get_next_cgroup_dir(linecmp, cg);
428 goto out;
429 }
430 answer = true;
431 goto out;
432 }
433
434out:
435 fclose(f);
436 free(line);
437 return answer;
438}
439
758ad80c 440/*
2c51f8dd
SH
441 * given /cgroup/freezer/a/b, return "freezer".
442 * the returned char* should NOT be freed.
758ad80c
SH
443 */
444static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
445{
446 const char *p1;
2c51f8dd 447 char *contr, *slash;
758ad80c
SH
448
449 if (strlen(path) < 9)
450 return NULL;
ac5d9d48
SH
451 if (*(path+7) != '/')
452 return NULL;
758ad80c 453 p1 = path+8;
2c51f8dd
SH
454 contr = strdupa(p1);
455 if (!contr)
456 return NULL;
457 slash = strstr(contr, "/");
758ad80c
SH
458 if (slash)
459 *slash = '\0';
460
461 /* verify that it is a subsystem */
462 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
463 int i;
2c51f8dd 464 if (!list)
758ad80c 465 return NULL;
758ad80c 466 for (i = 0; list[i]; i++) {
2c51f8dd
SH
467 if (strcmp(list[i], contr) == 0)
468 return list[i];
758ad80c 469 }
758ad80c
SH
470 return NULL;
471}
472
473/*
474 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
475 * Note that the returned value may include files (keynames) etc
476 */
477static const char *find_cgroup_in_path(const char *path)
478{
479 const char *p1;
480
481 if (strlen(path) < 9)
482 return NULL;
483 p1 = strstr(path+8, "/");
484 if (!p1)
485 return NULL;
486 return p1+1;
487}
488
489static bool is_child_cgroup(const char *contr, const char *dir, const char *f)
490{
2c51f8dd
SH
491 char **list;
492 bool ret = false;
758ad80c
SH
493 int i;
494
495 if (!f)
496 return false;
497 if (*f == '/')
498 f++;
499
500 if (!cgm_list_children(contr, dir, &list))
501 return false;
502 for (i = 0; list[i]; i++) {
2c51f8dd
SH
503 if (strcmp(list[i], f) == 0) {
504 ret = true;
505 goto out;
506 }
758ad80c
SH
507 }
508
2c51f8dd
SH
509out:
510 for (i = 0; list[i]; i++)
511 free(list[i]);
512 free(list);
513 return ret;
758ad80c
SH
514}
515
516static struct cgm_keys *get_cgroup_key(const char *contr, const char *dir, const char *f)
517{
2c51f8dd
SH
518 struct cgm_keys **list = NULL;
519 struct cgm_keys *k = NULL;
758ad80c
SH
520 int i;
521
522 if (!f)
523 return NULL;
524 if (*f == '/')
525 f++;
526 if (!cgm_list_keys(contr, dir, &list))
527 return NULL;
528 for (i = 0; list[i]; i++) {
529 if (strcmp(list[i]->name, f) == 0) {
2c51f8dd
SH
530 int j;
531 // free all the keys we are not returning
532 k = list[i];
533 for (j = 0; list[j]; j++) {
534 if (i != j)
103f104c 535 free_key(list[j]);
2c51f8dd
SH
536 }
537 free(list);
758ad80c
SH
538 return k;
539 }
540 }
541
2c51f8dd 542 free_keys(list);
758ad80c
SH
543 return NULL;
544}
545
2c51f8dd
SH
546/*
547 * dir should be freed, file not
548 */
758ad80c
SH
549static void get_cgdir_and_path(const char *cg, char **dir, char **file)
550{
758ad80c
SH
551 char *p;
552
2c51f8dd
SH
553 do {
554 *dir = strdup(cg);
555 } while (!*dir);
758ad80c
SH
556 *file = strrchr(cg, '/');
557 if (!*file) {
558 *file = NULL;
559 return;
560 }
561 p = strrchr(*dir, '/');
562 *p = '\0';
563}
564
565/*
2ad6d2bd 566 * FUSE ops for /cgroup
758ad80c 567 */
2ad6d2bd 568
758ad80c
SH
569static int cg_getattr(const char *path, struct stat *sb)
570{
571 struct timespec now;
572 struct fuse_context *fc = fuse_get_context();
2c51f8dd 573 char * cgdir = NULL;
758ad80c 574 char *fpath = NULL, *path1, *path2;
2c51f8dd 575 struct cgm_keys *k = NULL;
758ad80c 576 const char *cgroup;
2c51f8dd
SH
577 const char *controller = NULL;
578 int ret = -ENOENT;
758ad80c
SH
579
580
581 if (!fc)
582 return -EIO;
583
584 memset(sb, 0, sizeof(struct stat));
585
586 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
587 return -EINVAL;
588
589 sb->st_uid = sb->st_gid = 0;
590 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
591 sb->st_size = 0;
592
593 if (strcmp(path, "/cgroup") == 0) {
594 sb->st_mode = S_IFDIR | 00755;
595 sb->st_nlink = 2;
596 return 0;
597 }
598
599 controller = pick_controller_from_path(fc, path);
600 if (!controller)
601 return -EIO;
758ad80c
SH
602 cgroup = find_cgroup_in_path(path);
603 if (!cgroup) {
604 /* this is just /cgroup/controller, return it as a dir */
605 sb->st_mode = S_IFDIR | 00755;
606 sb->st_nlink = 2;
607 return 0;
608 }
341b21ad 609
758ad80c
SH
610 get_cgdir_and_path(cgroup, &cgdir, &fpath);
611
612 if (!fpath) {
613 path1 = "/";
614 path2 = cgdir;
615 } else {
616 path1 = cgdir;
617 path2 = fpath;
618 }
619
758ad80c
SH
620 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
621 * Then check that caller's cgroup is under path if fpath is a child
622 * cgroup, or cgdir if fpath is a file */
623
624 if (is_child_cgroup(controller, path1, path2)) {
f9a05025
SH
625 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
626 /* this is just /cgroup/controller, return it as a dir */
627 sb->st_mode = S_IFDIR | 00555;
628 sb->st_nlink = 2;
2c51f8dd
SH
629 ret = 0;
630 goto out;
631 }
632 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
633 ret = -EACCES;
634 goto out;
f9a05025 635 }
758ad80c 636
053a659d
SH
637 // get uid, gid, from '/tasks' file and make up a mode
638 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
639 sb->st_mode = S_IFDIR | 00755;
640 k = get_cgroup_key(controller, cgroup, "tasks");
641 if (!k) {
053a659d
SH
642 sb->st_uid = sb->st_gid = 0;
643 } else {
053a659d
SH
644 sb->st_uid = k->uid;
645 sb->st_gid = k->gid;
646 }
2c51f8dd 647 free_key(k);
758ad80c 648 sb->st_nlink = 2;
2c51f8dd
SH
649 ret = 0;
650 goto out;
758ad80c
SH
651 }
652
653 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
758ad80c 654 sb->st_mode = S_IFREG | k->mode;
053a659d 655 sb->st_nlink = 1;
758ad80c
SH
656 sb->st_uid = k->uid;
657 sb->st_gid = k->gid;
7253e0a4 658 sb->st_size = 0;
2c51f8dd
SH
659 free_key(k);
660 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL))
661 return -ENOENT;
662 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY))
663 return -EACCES;
664
665 ret = 0;
758ad80c
SH
666 }
667
2c51f8dd
SH
668out:
669 free(cgdir);
670 return ret;
758ad80c 671}
2183082c 672
758ad80c 673static int cg_opendir(const char *path, struct fuse_file_info *fi)
2183082c 674{
7f163b71 675 struct fuse_context *fc = fuse_get_context();
7f163b71 676 const char *cgroup;
c688e1b3 677 struct file_info *dir_info;
2c51f8dd 678 char *controller = NULL;
7f163b71
SH
679
680 if (!fc)
681 return -EIO;
682
c688e1b3
SH
683 if (strcmp(path, "/cgroup") == 0) {
684 cgroup = NULL;
685 controller = NULL;
686 } else {
687 // return list of keys for the controller, and list of child cgroups
688 controller = pick_controller_from_path(fc, path);
689 if (!controller)
690 return -EIO;
7f163b71 691
c688e1b3
SH
692 cgroup = find_cgroup_in_path(path);
693 if (!cgroup) {
694 /* this is just /cgroup/controller, return its contents */
695 cgroup = "/";
696 }
7f163b71
SH
697 }
698
2c51f8dd 699 if (cgroup && !fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
7f163b71 700 return -EACCES;
2c51f8dd 701 }
c688e1b3
SH
702
703 /* we'll free this at cg_releasedir */
2c51f8dd
SH
704 dir_info = malloc(sizeof(*dir_info));
705 if (!dir_info)
706 return -ENOMEM;
bae07053
SH
707 dir_info->controller = must_copy_string(dir_info, controller);
708 dir_info->cgroup = must_copy_string(dir_info, cgroup);
443d13f5 709 dir_info->type = LXC_TYPE_CGDIR;
c688e1b3 710 dir_info->buf = NULL;
8f6e8f5e 711 dir_info->file = NULL;
c688e1b3
SH
712 dir_info->buflen = 0;
713
714 fi->fh = (unsigned long)dir_info;
758ad80c
SH
715 return 0;
716}
717
758ad80c
SH
718static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
719 struct fuse_file_info *fi)
720{
c688e1b3 721 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
722 struct cgm_keys **list = NULL;
723 int i, ret;
724 char *nextcg = NULL;
758ad80c 725 struct fuse_context *fc = fuse_get_context();
2c51f8dd 726 char **clist = NULL;
758ad80c 727
443d13f5 728 if (d->type != LXC_TYPE_CGDIR) {
b845ad01
SH
729 fprintf(stderr, "Internal error: file cache info used in readdir\n");
730 return -EIO;
731 }
c688e1b3
SH
732 if (!d->cgroup && !d->controller) {
733 // ls /var/lib/lxcfs/cgroup - just show list of controllers
758ad80c
SH
734 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
735 int i;
736
737 if (!list)
738 return -EIO;
7f163b71 739
758ad80c
SH
740 for (i = 0; list[i]; i++) {
741 if (filler(buf, list[i], NULL, 0) != 0) {
742 return -EIO;
743 }
744 }
745 return 0;
746 }
747
2c51f8dd 748 if (!cgm_list_keys(d->controller, d->cgroup, &list)) {
3db25a35 749 // not a valid cgroup
2c51f8dd
SH
750 ret = -EINVAL;
751 goto out;
752 }
3db25a35 753
c688e1b3 754 if (!caller_is_in_ancestor(fc->pid, d->controller, d->cgroup, &nextcg)) {
3db25a35
SH
755 if (nextcg) {
756 int ret;
757 ret = filler(buf, nextcg, NULL, 0);
2c51f8dd
SH
758 free(nextcg);
759 if (ret != 0) {
760 ret = -EIO;
761 goto out;
762 }
3db25a35 763 }
2c51f8dd
SH
764 ret = 0;
765 goto out;
3db25a35
SH
766 }
767
758ad80c 768 for (i = 0; list[i]; i++) {
758ad80c 769 if (filler(buf, list[i]->name, NULL, 0) != 0) {
2c51f8dd
SH
770 ret = -EIO;
771 goto out;
758ad80c
SH
772 }
773 }
774
775 // now get the list of child cgroups
758ad80c 776
2c51f8dd
SH
777 if (!cgm_list_children(d->controller, d->cgroup, &clist)) {
778 ret = 0;
779 goto out;
780 }
758ad80c 781 for (i = 0; clist[i]; i++) {
758ad80c 782 if (filler(buf, clist[i], NULL, 0) != 0) {
2c51f8dd
SH
783 ret = -EIO;
784 goto out;
758ad80c
SH
785 }
786 }
2c51f8dd
SH
787 ret = 0;
788
789out:
790 free_keys(list);
791 if (clist) {
792 for (i = 0; clist[i]; i++)
793 free(clist[i]);
794 free(clist);
795 }
796 return ret;
758ad80c
SH
797}
798
8f6e8f5e
SH
799static void do_release_file_info(struct file_info *f)
800{
2c51f8dd
SH
801 if (!f)
802 return;
803 free(f->controller);
804 free(f->cgroup);
805 free(f->file);
806 free(f->buf);
807 free(f);
8f6e8f5e
SH
808}
809
758ad80c
SH
810static int cg_releasedir(const char *path, struct fuse_file_info *fi)
811{
c688e1b3
SH
812 struct file_info *d = (struct file_info *)fi->fh;
813
8f6e8f5e 814 do_release_file_info(d);
758ad80c
SH
815 return 0;
816}
817
99978832
SH
818static int cg_open(const char *path, struct fuse_file_info *fi)
819{
99978832 820 const char *cgroup;
2c51f8dd
SH
821 char *fpath = NULL, *path1, *path2, * cgdir = NULL, *controller;
822 struct cgm_keys *k = NULL;
8f6e8f5e 823 struct file_info *file_info;
99978832 824 struct fuse_context *fc = fuse_get_context();
2c51f8dd 825 int ret;
99978832
SH
826
827 if (!fc)
828 return -EIO;
829
830 controller = pick_controller_from_path(fc, path);
831 if (!controller)
832 return -EIO;
833 cgroup = find_cgroup_in_path(path);
834 if (!cgroup)
835 return -EINVAL;
836
837 get_cgdir_and_path(cgroup, &cgdir, &fpath);
838 if (!fpath) {
839 path1 = "/";
840 path2 = cgdir;
841 } else {
842 path1 = cgdir;
843 path2 = fpath;
844 }
845
8f6e8f5e 846 k = get_cgroup_key(controller, path1, path2);
2c51f8dd
SH
847 if (!k) {
848 ret = -EINVAL;
849 goto out;
850 }
851 free_key(k);
99978832 852
2c51f8dd 853 if (!fc_may_access(fc, controller, path1, path2, fi->flags)) {
8f6e8f5e 854 // should never get here
2c51f8dd
SH
855 ret = -EACCES;
856 goto out;
857 }
99978832 858
8f6e8f5e 859 /* we'll free this at cg_release */
2c51f8dd
SH
860 file_info = malloc(sizeof(*file_info));
861 if (!file_info) {
862 ret = -ENOMEM;
863 goto out;
864 }
bae07053
SH
865 file_info->controller = must_copy_string(file_info, controller);
866 file_info->cgroup = must_copy_string(file_info, path1);
867 file_info->file = must_copy_string(file_info, path2);
443d13f5 868 file_info->type = LXC_TYPE_CGFILE;
8f6e8f5e
SH
869 file_info->buf = NULL;
870 file_info->buflen = 0;
871
872 fi->fh = (unsigned long)file_info;
2c51f8dd
SH
873 ret = 0;
874
875out:
876 free(cgdir);
877 return ret;
8f6e8f5e
SH
878}
879
880static int cg_release(const char *path, struct fuse_file_info *fi)
881{
882 struct file_info *f = (struct file_info *)fi->fh;
883
884 do_release_file_info(f);
885 return 0;
99978832
SH
886}
887
a05660a6
SH
888static int msgrecv(int sockfd, void *buf, size_t len)
889{
890 struct timeval tv;
891 fd_set rfds;
892
893 FD_ZERO(&rfds);
894 FD_SET(sockfd, &rfds);
895 tv.tv_sec = 2;
896 tv.tv_usec = 0;
897
ea56f722 898 if (select(sockfd+1, &rfds, NULL, NULL, &tv) <= 0)
a05660a6
SH
899 return -1;
900 return recv(sockfd, buf, len, MSG_DONTWAIT);
901}
902
01e71852
SH
903#define SEND_CREDS_OK 0
904#define SEND_CREDS_NOTSK 1
905#define SEND_CREDS_FAIL 2
906static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
a05660a6
SH
907{
908 struct msghdr msg = { 0 };
909 struct iovec iov;
910 struct cmsghdr *cmsg;
911 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
912 char buf[1];
913 buf[0] = 'p';
914
01e71852
SH
915 if (pingfirst) {
916 if (msgrecv(sock, buf, 1) != 1) {
1420baf8 917 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
01e71852
SH
918 __func__);
919 return SEND_CREDS_FAIL;
920 }
a05660a6
SH
921 }
922
923 msg.msg_control = cmsgbuf;
924 msg.msg_controllen = sizeof(cmsgbuf);
925
926 cmsg = CMSG_FIRSTHDR(&msg);
927 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
928 cmsg->cmsg_level = SOL_SOCKET;
929 cmsg->cmsg_type = SCM_CREDENTIALS;
930 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
931
932 msg.msg_name = NULL;
933 msg.msg_namelen = 0;
934
935 buf[0] = v;
936 iov.iov_base = buf;
937 iov.iov_len = sizeof(buf);
938 msg.msg_iov = &iov;
939 msg.msg_iovlen = 1;
940
941 if (sendmsg(sock, &msg, 0) < 0) {
1420baf8 942 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
a05660a6
SH
943 strerror(errno));
944 if (errno == 3)
01e71852
SH
945 return SEND_CREDS_NOTSK;
946 return SEND_CREDS_FAIL;
a05660a6
SH
947 }
948
01e71852 949 return SEND_CREDS_OK;
a05660a6
SH
950}
951
952static bool recv_creds(int sock, struct ucred *cred, char *v)
953{
954 struct msghdr msg = { 0 };
955 struct iovec iov;
956 struct cmsghdr *cmsg;
957 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
958 char buf[1];
959 int ret;
960 int optval = 1;
6ee867dc
SH
961 struct timeval tv;
962 fd_set rfds;
a05660a6
SH
963
964 *v = '1';
965
966 cred->pid = -1;
967 cred->uid = -1;
968 cred->gid = -1;
969
970 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
1420baf8 971 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
a05660a6
SH
972 return false;
973 }
974 buf[0] = '1';
975 if (write(sock, buf, 1) != 1) {
1420baf8 976 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
a05660a6
SH
977 return false;
978 }
979
980 msg.msg_name = NULL;
981 msg.msg_namelen = 0;
982 msg.msg_control = cmsgbuf;
983 msg.msg_controllen = sizeof(cmsgbuf);
984
985 iov.iov_base = buf;
986 iov.iov_len = sizeof(buf);
987 msg.msg_iov = &iov;
988 msg.msg_iovlen = 1;
989
6ee867dc
SH
990 FD_ZERO(&rfds);
991 FD_SET(sock, &rfds);
992 tv.tv_sec = 2;
993 tv.tv_usec = 0;
ea56f722 994 if (select(sock+1, &rfds, NULL, NULL, &tv) <= 0) {
6ee867dc
SH
995 fprintf(stderr, "Failed to select for scm_cred: %s\n",
996 strerror(errno));
997 return false;
998 }
999 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
a05660a6 1000 if (ret < 0) {
1420baf8 1001 fprintf(stderr, "Failed to receive scm_cred: %s\n",
a05660a6
SH
1002 strerror(errno));
1003 return false;
1004 }
1005
1006 cmsg = CMSG_FIRSTHDR(&msg);
1007
1008 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
1009 cmsg->cmsg_level == SOL_SOCKET &&
1010 cmsg->cmsg_type == SCM_CREDENTIALS) {
1011 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
1012 }
1013 *v = buf[0];
1014
1015 return true;
1016}
1017
1018
1019/*
4775fba1
SH
1020 * pid_to_ns - reads pids from a ucred over a socket, then writes the
1021 * int value back over the socket. This shifts the pid from the
1022 * sender's pidns into tpid's pidns.
a05660a6 1023 */
4775fba1 1024static void pid_to_ns(int sock, pid_t tpid)
a05660a6
SH
1025{
1026 char v = '0';
1027 struct ucred cred;
1028
1029 while (recv_creds(sock, &cred, &v)) {
1030 if (v == '1')
67bd113f 1031 _exit(0);
a05660a6 1032 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
67bd113f 1033 _exit(1);
a05660a6 1034 }
67bd113f 1035 _exit(0);
a05660a6
SH
1036}
1037
1038/*
4775fba1 1039 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
a05660a6 1040 * in your old pidns. Only children which you fork will be in the target
4775fba1 1041 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
a05660a6
SH
1042 * actually convert pids
1043 */
4775fba1 1044static void pid_to_ns_wrapper(int sock, pid_t tpid)
a05660a6 1045{
ea56f722 1046 int newnsfd = -1, ret, cpipe[2];
a05660a6
SH
1047 char fnam[100];
1048 pid_t cpid;
ea56f722
SH
1049 struct timeval tv;
1050 fd_set s;
1051 char v;
a05660a6 1052
c0adec85
SH
1053 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1054 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1055 _exit(1);
a05660a6
SH
1056 newnsfd = open(fnam, O_RDONLY);
1057 if (newnsfd < 0)
67bd113f 1058 _exit(1);
a05660a6 1059 if (setns(newnsfd, 0) < 0)
67bd113f 1060 _exit(1);
a05660a6
SH
1061 close(newnsfd);
1062
ea56f722 1063 if (pipe(cpipe) < 0)
67bd113f 1064 _exit(1);
a05660a6 1065
ea56f722
SH
1066loop:
1067 cpid = fork();
a05660a6 1068 if (cpid < 0)
67bd113f 1069 _exit(1);
ea56f722
SH
1070
1071 if (!cpid) {
1072 char b = '1';
1073 close(cpipe[0]);
1074 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1075 fprintf(stderr, "%s (child): erorr on write: %s\n",
1076 __func__, strerror(errno));
1077 }
1078 close(cpipe[1]);
4775fba1 1079 pid_to_ns(sock, tpid);
ea56f722
SH
1080 }
1081 // give the child 1 second to be done forking and
1082 // write it's ack
1083 FD_ZERO(&s);
1084 FD_SET(cpipe[0], &s);
1085 tv.tv_sec = 1;
1086 tv.tv_usec = 0;
1087 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
1088 if (ret <= 0)
1089 goto again;
1090 ret = read(cpipe[0], &v, 1);
1091 if (ret != sizeof(char) || v != '1') {
1092 goto again;
1093 }
1094
a05660a6 1095 if (!wait_for_pid(cpid))
67bd113f
SH
1096 _exit(1);
1097 _exit(0);
ea56f722
SH
1098
1099again:
1100 kill(cpid, SIGKILL);
1101 wait_for_pid(cpid);
1102 goto loop;
a05660a6
SH
1103}
1104
1105/*
1106 * To read cgroup files with a particular pid, we will setns into the child
1107 * pidns, open a pipe, fork a child - which will be the first to really be in
1108 * the child ns - which does the cgm_get_value and writes the data to the pipe.
1109 */
1110static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
1111{
1112 int sock[2] = {-1, -1};
2c51f8dd 1113 char *tmpdata = NULL;
a05660a6
SH
1114 int ret;
1115 pid_t qpid, cpid = -1;
1116 bool answer = false;
1117 char v = '0';
1118 struct ucred cred;
1119 struct timeval tv;
2c51f8dd 1120 size_t sz = 0, asz = 0;
a05660a6
SH
1121 fd_set s;
1122
1123 if (!cgm_get_value(contrl, cg, file, &tmpdata))
1124 return false;
1125
1126 /*
1127 * Now we read the pids from returned data one by one, pass
1128 * them into a child in the target namespace, read back the
1129 * translated pids, and put them into our to-return data
1130 */
1131
1132 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1133 perror("socketpair");
2c51f8dd
SH
1134 free(tmpdata);
1135 return false;
a05660a6
SH
1136 }
1137
1138 cpid = fork();
1139 if (cpid == -1)
1140 goto out;
1141
1142 if (!cpid) // child
4775fba1 1143 pid_to_ns_wrapper(sock[1], tpid);
a05660a6
SH
1144
1145 char *ptr = tmpdata;
1146 cred.uid = 0;
1147 cred.gid = 0;
1148 while (sscanf(ptr, "%d\n", &qpid) == 1) {
1149 cred.pid = qpid;
01e71852
SH
1150 ret = send_creds(sock[0], &cred, v, true);
1151
1152 if (ret == SEND_CREDS_NOTSK)
1153 goto next;
1154 if (ret == SEND_CREDS_FAIL)
a05660a6
SH
1155 goto out;
1156
1157 // read converted results
1158 FD_ZERO(&s);
1159 FD_SET(sock[0], &s);
6ee867dc 1160 tv.tv_sec = 2;
a05660a6
SH
1161 tv.tv_usec = 0;
1162 ret = select(sock[0]+1, &s, NULL, NULL, &tv);
1163 if (ret <= 0) {
6ee867dc
SH
1164 fprintf(stderr, "%s: select error waiting for pid from child: %s\n",
1165 __func__, strerror(errno));
a05660a6
SH
1166 goto out;
1167 }
1168 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1169 fprintf(stderr, "%s: error reading pid from child: %s\n",
1170 __func__, strerror(errno));
a05660a6
SH
1171 goto out;
1172 }
2c51f8dd 1173 must_strcat_pid(d, &sz, &asz, qpid);
01e71852 1174next:
a05660a6
SH
1175 ptr = strchr(ptr, '\n');
1176 if (!ptr)
1177 break;
1178 ptr++;
1179 }
1180
1181 cred.pid = getpid();
1182 v = '1';
01e71852 1183 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
a05660a6 1184 // failed to ask child to exit
6ee867dc
SH
1185 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
1186 __func__, strerror(errno));
a05660a6
SH
1187 goto out;
1188 }
1189
1190 answer = true;
1191
1192out:
2c51f8dd 1193 free(tmpdata);
a05660a6
SH
1194 if (cpid != -1)
1195 wait_for_pid(cpid);
1196 if (sock[0] != -1) {
1197 close(sock[0]);
1198 close(sock[1]);
1199 }
1200 return answer;
1201}
1202
99978832
SH
1203static int cg_read(const char *path, char *buf, size_t size, off_t offset,
1204 struct fuse_file_info *fi)
1205{
99978832 1206 struct fuse_context *fc = fuse_get_context();
8f6e8f5e 1207 struct file_info *f = (struct file_info *)fi->fh;
2c51f8dd
SH
1208 struct cgm_keys *k = NULL;
1209 char *data = NULL;
1210 int ret, s;
1211 bool r;
99978832 1212
443d13f5 1213 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1214 fprintf(stderr, "Internal error: directory cache info used in cg_read\n");
1215 return -EIO;
1216 }
1217
99978832 1218 if (offset)
7253e0a4 1219 return 0;
99978832
SH
1220
1221 if (!fc)
1222 return -EIO;
1223
8f6e8f5e 1224 if (!f->controller)
99978832
SH
1225 return -EINVAL;
1226
2c51f8dd
SH
1227 if ((k = get_cgroup_key(f->controller, f->cgroup, f->file)) == NULL) {
1228 return -EINVAL;
1229 }
1230 free_key(k);
99978832 1231
99978832 1232
2c51f8dd
SH
1233 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY)) { // should never get here
1234 ret = -EACCES;
1235 goto out;
1236 }
a05660a6 1237
2c51f8dd
SH
1238 if (strcmp(f->file, "tasks") == 0 ||
1239 strcmp(f->file, "/tasks") == 0 ||
1240 strcmp(f->file, "/cgroup.procs") == 0 ||
1241 strcmp(f->file, "cgroup.procs") == 0)
1242 // special case - we have to translate the pids
1243 r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data);
1244 else
1245 r = cgm_get_value(f->controller, f->cgroup, f->file, &data);
99978832 1246
2c51f8dd
SH
1247 if (!r) {
1248 ret = -EINVAL;
1249 goto out;
1250 }
99978832 1251
2c51f8dd
SH
1252 if (!data) {
1253 ret = 0;
1254 goto out;
99978832 1255 }
2c51f8dd
SH
1256 s = strlen(data);
1257 if (s > size)
1258 s = size;
1259 memcpy(buf, data, s);
1260 if (s > 0 && s < size && data[s-1] != '\n')
1261 buf[s++] = '\n';
99978832 1262
2c51f8dd
SH
1263 ret = s;
1264
1265out:
1266 free(data);
1267 return ret;
99978832
SH
1268}
1269
4775fba1
SH
1270static void pid_from_ns(int sock, pid_t tpid)
1271{
1272 pid_t vpid;
1273 struct ucred cred;
1274 char v;
6ee867dc
SH
1275 struct timeval tv;
1276 fd_set s;
1277 int ret;
4775fba1
SH
1278
1279 cred.uid = 0;
1280 cred.gid = 0;
6ee867dc
SH
1281 while (1) {
1282 FD_ZERO(&s);
1283 FD_SET(sock, &s);
1284 tv.tv_sec = 2;
1285 tv.tv_usec = 0;
1286 ret = select(sock+1, &s, NULL, NULL, &tv);
ea56f722
SH
1287 if (ret <= 0) {
1288 fprintf(stderr, "%s: bad select before read from parent: %s\n",
6ee867dc 1289 __func__, strerror(errno));
67bd113f 1290 _exit(1);
6ee867dc
SH
1291 }
1292 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
1293 fprintf(stderr, "%s: bad read from parent: %s\n",
1294 __func__, strerror(errno));
67bd113f 1295 _exit(1);
6ee867dc 1296 }
4775fba1 1297 if (vpid == -1) // done
01e71852 1298 break;
4775fba1
SH
1299 v = '0';
1300 cred.pid = vpid;
01e71852 1301 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
4775fba1
SH
1302 v = '1';
1303 cred.pid = getpid();
01e71852 1304 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
67bd113f 1305 _exit(1);
4775fba1
SH
1306 }
1307 }
67bd113f 1308 _exit(0);
4775fba1
SH
1309}
1310
1311static void pid_from_ns_wrapper(int sock, pid_t tpid)
1312{
ea56f722 1313 int newnsfd = -1, ret, cpipe[2];
4775fba1
SH
1314 char fnam[100];
1315 pid_t cpid;
ea56f722
SH
1316 fd_set s;
1317 struct timeval tv;
1318 char v;
4775fba1 1319
c0adec85
SH
1320 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1321 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1322 _exit(1);
4775fba1
SH
1323 newnsfd = open(fnam, O_RDONLY);
1324 if (newnsfd < 0)
67bd113f 1325 _exit(1);
4775fba1 1326 if (setns(newnsfd, 0) < 0)
67bd113f 1327 _exit(1);
4775fba1
SH
1328 close(newnsfd);
1329
ea56f722 1330 if (pipe(cpipe) < 0)
67bd113f 1331 _exit(1);
ea56f722
SH
1332
1333loop:
4775fba1
SH
1334 cpid = fork();
1335
1336 if (cpid < 0)
67bd113f 1337 _exit(1);
ea56f722
SH
1338
1339 if (!cpid) {
1340 char b = '1';
1341 close(cpipe[0]);
1342 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1343 fprintf(stderr, "%s (child): erorr on write: %s\n",
1344 __func__, strerror(errno));
1345 }
1346 close(cpipe[1]);
4775fba1 1347 pid_from_ns(sock, tpid);
ea56f722
SH
1348 }
1349
1350 // give the child 1 second to be done forking and
1351 // write it's ack
1352 FD_ZERO(&s);
1353 FD_SET(cpipe[0], &s);
1354 tv.tv_sec = 1;
1355 tv.tv_usec = 0;
1356 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
1357 if (ret <= 0)
1358 goto again;
1359 ret = read(cpipe[0], &v, 1);
1360 if (ret != sizeof(char) || v != '1') {
1361 goto again;
1362 }
1363
4775fba1 1364 if (!wait_for_pid(cpid))
67bd113f
SH
1365 _exit(1);
1366 _exit(0);
ea56f722
SH
1367
1368again:
1369 kill(cpid, SIGKILL);
1370 wait_for_pid(cpid);
1371 goto loop;
4775fba1
SH
1372}
1373
1374static bool do_write_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, const char *buf)
1375{
1376 int sock[2] = {-1, -1};
1377 pid_t qpid, cpid = -1;
1378 bool answer = false, fail = false;
1379
1380 /*
1381 * write the pids to a socket, have helper in writer's pidns
1382 * call movepid for us
1383 */
1384 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1385 perror("socketpair");
1386 exit(1);
1387 }
1388
1389 cpid = fork();
1390 if (cpid == -1)
1391 goto out;
1392
1393 if (!cpid) // child
1394 pid_from_ns_wrapper(sock[1], tpid);
1395
1396 const char *ptr = buf;
1397 while (sscanf(ptr, "%d", &qpid) == 1) {
1398 struct ucred cred;
1399 char v;
1400
1401 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1402 fprintf(stderr, "%s: error writing pid to child: %s\n",
1403 __func__, strerror(errno));
4775fba1
SH
1404 goto out;
1405 }
1406
01e71852
SH
1407 if (recv_creds(sock[0], &cred, &v)) {
1408 if (v == '0') {
1409 if (!cgm_move_pid(contrl, cg, cred.pid))
1410 fail = true;
1411 }
4775fba1
SH
1412 }
1413
1414 ptr = strchr(ptr, '\n');
1415 if (!ptr)
1416 break;
1417 ptr++;
1418 }
1419
1420 /* All good, write the value */
1421 qpid = -1;
1422 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
1420baf8 1423 fprintf(stderr, "Warning: failed to ask child to exit\n");
4775fba1
SH
1424
1425 if (!fail)
1426 answer = true;
1427
1428out:
1429 if (cpid != -1)
1430 wait_for_pid(cpid);
1431 if (sock[0] != -1) {
1432 close(sock[0]);
1433 close(sock[1]);
1434 }
1435 return answer;
1436}
1437
2ad6d2bd
SH
1438int cg_write(const char *path, const char *buf, size_t size, off_t offset,
1439 struct fuse_file_info *fi)
1440{
2ad6d2bd 1441 struct fuse_context *fc = fuse_get_context();
2c51f8dd
SH
1442 char *localbuf = NULL;
1443 struct cgm_keys *k = NULL;
8f6e8f5e 1444 struct file_info *f = (struct file_info *)fi->fh;
2c51f8dd 1445 bool r;
2ad6d2bd 1446
443d13f5 1447 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1448 fprintf(stderr, "Internal error: directory cache info used in cg_write\n");
1449 return -EIO;
1450 }
1451
2ad6d2bd 1452 if (offset)
7253e0a4 1453 return 0;
2ad6d2bd
SH
1454
1455 if (!fc)
1456 return -EIO;
1457
2c51f8dd 1458 localbuf = alloca(size+1);
47cbf0e5
SH
1459 localbuf[size] = '\0';
1460 memcpy(localbuf, buf, size);
2ad6d2bd 1461
2c51f8dd
SH
1462 if ((k = get_cgroup_key(f->controller, f->cgroup, f->file)) == NULL) {
1463 size = -EINVAL;
1464 goto out;
1465 }
2ad6d2bd 1466
2c51f8dd
SH
1467 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY)) {
1468 size = -EACCES;
1469 goto out;
1470 }
4775fba1 1471
2c51f8dd
SH
1472 if (strcmp(f->file, "tasks") == 0 ||
1473 strcmp(f->file, "/tasks") == 0 ||
1474 strcmp(f->file, "/cgroup.procs") == 0 ||
1475 strcmp(f->file, "cgroup.procs") == 0)
1476 // special case - we have to translate the pids
1477 r = do_write_pids(fc->pid, f->controller, f->cgroup, f->file, localbuf);
1478 else
1479 r = cgm_set_value(f->controller, f->cgroup, f->file, localbuf);
2ad6d2bd 1480
2c51f8dd
SH
1481 if (!r)
1482 size = -EINVAL;
2ad6d2bd 1483
2c51f8dd
SH
1484out:
1485 free_key(k);
1486 return size;
2ad6d2bd
SH
1487}
1488
341b21ad
SH
1489int cg_chown(const char *path, uid_t uid, gid_t gid)
1490{
1491 struct fuse_context *fc = fuse_get_context();
2c51f8dd
SH
1492 char *cgdir = NULL, *fpath = NULL, *path1, *path2, *controller;
1493 struct cgm_keys *k = NULL;
341b21ad 1494 const char *cgroup;
2c51f8dd 1495 int ret;
341b21ad
SH
1496
1497 if (!fc)
1498 return -EIO;
1499
1500 if (strcmp(path, "/cgroup") == 0)
1501 return -EINVAL;
1502
1503 controller = pick_controller_from_path(fc, path);
1504 if (!controller)
f9a05025 1505 return -EINVAL;
341b21ad
SH
1506 cgroup = find_cgroup_in_path(path);
1507 if (!cgroup)
1508 /* this is just /cgroup/controller */
1509 return -EINVAL;
1510
1511 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1512
1513 if (!fpath) {
1514 path1 = "/";
1515 path2 = cgdir;
1516 } else {
1517 path1 = cgdir;
1518 path2 = fpath;
1519 }
1520
1521 if (is_child_cgroup(controller, path1, path2)) {
1522 // get uid, gid, from '/tasks' file and make up a mode
1523 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1524 k = get_cgroup_key(controller, cgroup, "tasks");
1525
1526 } else
1527 k = get_cgroup_key(controller, path1, path2);
1528
2c51f8dd
SH
1529 if (!k) {
1530 ret = -EINVAL;
1531 goto out;
1532 }
341b21ad
SH
1533
1534 /*
1535 * This being a fuse request, the uid and gid must be valid
1536 * in the caller's namespace. So we can just check to make
1537 * sure that the caller is root in his uid, and privileged
1538 * over the file's current owner.
1539 */
2c51f8dd
SH
1540 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) {
1541 ret = -EACCES;
1542 goto out;
1543 }
341b21ad 1544
2c51f8dd
SH
1545 if (!cgm_chown_file(controller, cgroup, uid, gid)) {
1546 ret = -EINVAL;
1547 goto out;
1548 }
1549
1550 ret = 0;
1551
1552out:
1553 free_key(k);
1554 free(cgdir);
1555
1556 return ret;
341b21ad 1557}
2ad6d2bd 1558
fd2e4e03
SH
1559int cg_chmod(const char *path, mode_t mode)
1560{
0a1bb5ea 1561 struct fuse_context *fc = fuse_get_context();
2c51f8dd
SH
1562 char * cgdir = NULL, *fpath = NULL, *path1, *path2, *controller;
1563 struct cgm_keys *k = NULL;
0a1bb5ea 1564 const char *cgroup;
2c51f8dd 1565 int ret;
0a1bb5ea
SH
1566
1567 if (!fc)
1568 return -EIO;
1569
1570 if (strcmp(path, "/cgroup") == 0)
1571 return -EINVAL;
1572
1573 controller = pick_controller_from_path(fc, path);
1574 if (!controller)
f9a05025 1575 return -EINVAL;
0a1bb5ea
SH
1576 cgroup = find_cgroup_in_path(path);
1577 if (!cgroup)
1578 /* this is just /cgroup/controller */
1579 return -EINVAL;
1580
1581 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1582
1583 if (!fpath) {
1584 path1 = "/";
1585 path2 = cgdir;
1586 } else {
1587 path1 = cgdir;
1588 path2 = fpath;
1589 }
1590
1591 if (is_child_cgroup(controller, path1, path2)) {
1592 // get uid, gid, from '/tasks' file and make up a mode
1593 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1594 k = get_cgroup_key(controller, cgroup, "tasks");
1595
1596 } else
1597 k = get_cgroup_key(controller, path1, path2);
1598
2c51f8dd
SH
1599 if (!k) {
1600 ret = -EINVAL;
1601 goto out;
1602 }
0a1bb5ea
SH
1603
1604 /*
1605 * This being a fuse request, the uid and gid must be valid
1606 * in the caller's namespace. So we can just check to make
1607 * sure that the caller is root in his uid, and privileged
1608 * over the file's current owner.
1609 */
2c51f8dd
SH
1610 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
1611 ret = -EPERM;
1612 goto out;
1613 }
0a1bb5ea 1614
2c51f8dd
SH
1615 if (!cgm_chmod_file(controller, cgroup, mode)) {
1616 ret = -EINVAL;
1617 goto out;
1618 }
1619
1620 ret = 0;
1621out:
1622 free_key(k);
1623 free(cgdir);
1624 return ret;
fd2e4e03
SH
1625}
1626
ab54b798
SH
1627int cg_mkdir(const char *path, mode_t mode)
1628{
1629 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1630 char *fpath = NULL, *path1, *cgdir = NULL, *controller;
ab54b798 1631 const char *cgroup;
2c51f8dd 1632 int ret;
ab54b798 1633
ab54b798
SH
1634 if (!fc)
1635 return -EIO;
1636
1637
1638 controller = pick_controller_from_path(fc, path);
1639 if (!controller)
f9a05025 1640 return -EINVAL;
ab54b798
SH
1641
1642 cgroup = find_cgroup_in_path(path);
1643 if (!cgroup)
f9a05025 1644 return -EINVAL;
ab54b798
SH
1645
1646 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1647 if (!fpath)
1648 path1 = "/";
1649 else
1650 path1 = cgdir;
1651
2c51f8dd
SH
1652 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) {
1653 ret = -EACCES;
1654 goto out;
1655 }
1656 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) {
1657 ret = -EACCES;
1658 goto out;
1659 }
ab54b798 1660
2c51f8dd
SH
1661 if (fc->uid == 0 && fc->gid == 0) {
1662 if (!cgm_create(controller, cgroup)) {
1663 ret = -EINVAL;
1664 goto out;
1665 }
1666 } else {
1667 /*
40b8c791 1668 * exec a helper so as to get a clean dbus connection
2c51f8dd
SH
1669 * 17 for lxcfs_mkdir, and spaces and newline and \0. 50 for two ints.
1670 * 50 for two ints
1671 */
1672 size_t len = strlen(cgroup) + strlen(controller) + 17 + 50;
1673 char *cmd = alloca(len);
1674 ret = snprintf(cmd, len, "lxcfs_mkdir %d %d %s %s\n",
1675 fc->uid, fc->gid, controller, cgroup);
1676 if (ret < 0 || ret >= len) {
1677 ret = -EINVAL;
1678 goto out;
1679 }
1680 ret = system(cmd);
1681 if (ret != 0)
1682 goto out;
1683 }
ab54b798 1684
2c51f8dd 1685 ret = 0;
ab54b798 1686
2c51f8dd
SH
1687out:
1688 free(cgdir);
1689 return ret;
ab54b798
SH
1690}
1691
50d8d5b5
SH
1692static int cg_rmdir(const char *path)
1693{
1694 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1695 char *fpath = NULL, *cgdir = NULL, *controller;
50d8d5b5 1696 const char *cgroup;
2c51f8dd 1697 int ret;
50d8d5b5
SH
1698
1699 if (!fc)
1700 return -EIO;
1701
50d8d5b5
SH
1702 controller = pick_controller_from_path(fc, path);
1703 if (!controller)
f9a05025 1704 return -EINVAL;
50d8d5b5
SH
1705
1706 cgroup = find_cgroup_in_path(path);
1707 if (!cgroup)
f9a05025 1708 return -EINVAL;
50d8d5b5
SH
1709
1710 get_cgdir_and_path(cgroup, &cgdir, &fpath);
2c51f8dd
SH
1711 if (!fpath) {
1712 ret = -EINVAL;
1713 goto out;
1714 }
50d8d5b5 1715
2c51f8dd
SH
1716 fprintf(stderr, "rmdir: verifying access to %s:%s (req path %s)\n",
1717 controller, cgdir, path);
1718 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) {
1719 ret = -EACCES;
1720 goto out;
1721 }
1722 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
1723 ret = -EACCES;
1724 goto out;
1725 }
50d8d5b5 1726
2c51f8dd
SH
1727 if (!cgm_remove(controller, cgroup)) {
1728 ret = -EINVAL;
1729 goto out;
1730 }
50d8d5b5 1731
2c51f8dd
SH
1732 ret = 0;
1733
1734out:
1735 free(cgdir);
1736 return ret;
50d8d5b5
SH
1737}
1738
2dc17609
SH
1739static bool startswith(const char *line, const char *pref)
1740{
1741 if (strncmp(line, pref, strlen(pref)) == 0)
1742 return true;
1743 return false;
1744}
1745
1746static void get_mem_cached(char *memstat, unsigned long *v)
1747{
1748 char *eol;
1749
1750 *v = 0;
1751 while (*memstat) {
1752 if (startswith(memstat, "total_cache")) {
1753 sscanf(memstat + 11, "%lu", v);
1754 *v /= 1024;
1755 return;
1756 }
1757 eol = strchr(memstat, '\n');
1758 if (!eol)
1759 return;
1760 memstat = eol+1;
1761 }
1762}
1763
49878439 1764static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
2f919d9d 1765{
49878439
YY
1766 char *eol;
1767 char key[32];
2f919d9d 1768
49878439
YY
1769 memset(key, 0, 32);
1770 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
2f919d9d 1771
49878439
YY
1772 size_t len = strlen(key);
1773 *v = 0;
1774
1775 while (*str) {
1776 if (startswith(str, key)) {
2f919d9d
SH
1777 sscanf(str + len, "%lu", v);
1778 return;
1779 }
1780 eol = strchr(str, '\n');
49878439 1781 if (!eol)
2f919d9d 1782 return;
49878439
YY
1783 str = eol+1;
1784 }
1785}
1786
53b43826
SH
1787static int read_file(const char *path, char *buf, size_t size,
1788 struct file_info *d)
1789{
1790 size_t linelen = 0, total_len = 0, rv = 0;
1791 char *line = NULL;
1792 char *cache = d->buf;
1793 size_t cache_size = d->buflen;
1794 FILE *f = fopen(path, "r");
1795 if (!f)
1796 return 0;
1797
1798 while (getline(&line, &linelen, f) != -1) {
1799 size_t l = snprintf(cache, cache_size, "%s", line);
1800 if (l < 0) {
1801 perror("Error writing to cache");
1802 rv = 0;
1803 goto err;
1804 }
1805 if (l >= cache_size) {
1806 fprintf(stderr, "Internal error: truncated write to cache\n");
1807 rv = 0;
1808 goto err;
1809 }
1810 if (l < cache_size) {
1811 cache += l;
1812 cache_size -= l;
1813 total_len += l;
1814 } else {
1815 cache += cache_size;
1816 total_len += cache_size;
1817 cache_size = 0;
1818 break;
1819 }
1820 }
1821
1822 d->size = total_len;
1823 if (total_len > size ) total_len = size;
1824
1825 /* read from off 0 */
1826 memcpy(buf, d->buf, total_len);
1827 rv = total_len;
1828 err:
1829 fclose(f);
1830 free(line);
1831 return rv;
1832}
1833
758ad80c 1834/*
2ad6d2bd 1835 * FUSE ops for /proc
758ad80c 1836 */
758ad80c 1837
23ce2127
SH
1838static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1839 struct fuse_file_info *fi)
1840{
2dc17609 1841 struct fuse_context *fc = fuse_get_context();
97f1f27b 1842 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
1843 char *cg;
1844 char *memlimit_str = NULL, *memusage_str = NULL, *memstat_str = NULL;
2dc17609
SH
1845 unsigned long memlimit = 0, memusage = 0, cached = 0, hosttotal = 0;
1846 char *line = NULL;
e1068397 1847 size_t linelen = 0, total_len = 0, rv = 0;
97f1f27b
YY
1848 char *cache = d->buf;
1849 size_t cache_size = d->buflen;
2c51f8dd 1850 FILE *f = NULL;
2dc17609 1851
97f1f27b
YY
1852 if (offset){
1853 if (offset > d->size)
1854 return -EINVAL;
1855 int left = d->size - offset;
1856 total_len = left > size ? size: left;
1857 memcpy(buf, cache + offset, total_len);
1858 return total_len;
1859 }
2dc17609 1860
2c51f8dd 1861 cg = get_pid_cgroup(fc->pid, "memory");
2dc17609 1862 if (!cg)
53b43826 1863 return read_file("/proc/meminfo", buf, size, d);
2dc17609
SH
1864
1865 if (!cgm_get_value("memory", cg, "memory.limit_in_bytes", &memlimit_str))
2c51f8dd 1866 goto err;
2dc17609 1867 if (!cgm_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
2c51f8dd 1868 goto err;
2dc17609 1869 if (!cgm_get_value("memory", cg, "memory.stat", &memstat_str))
2c51f8dd 1870 goto err;
2dc17609
SH
1871 memlimit = strtoul(memlimit_str, NULL, 10);
1872 memusage = strtoul(memusage_str, NULL, 10);
1873 memlimit /= 1024;
1874 memusage /= 1024;
1875 get_mem_cached(memstat_str, &cached);
1876
1877 f = fopen("/proc/meminfo", "r");
1878 if (!f)
2c51f8dd 1879 goto err;
2dc17609
SH
1880
1881 while (getline(&line, &linelen, f) != -1) {
1882 size_t l;
1883 char *printme, lbuf[100];
1884
1885 memset(lbuf, 0, 100);
1886 if (startswith(line, "MemTotal:")) {
1887 sscanf(line+14, "%lu", &hosttotal);
1888 if (hosttotal < memlimit)
1889 memlimit = hosttotal;
1890 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
1891 printme = lbuf;
1892 } else if (startswith(line, "MemFree:")) {
1893 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
1894 printme = lbuf;
1895 } else if (startswith(line, "MemAvailable:")) {
1896 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
1897 printme = lbuf;
1898 } else if (startswith(line, "Buffers:")) {
1899 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
1900 printme = lbuf;
1901 } else if (startswith(line, "Cached:")) {
1902 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
1903 printme = lbuf;
1904 } else if (startswith(line, "SwapCached:")) {
1905 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
1906 printme = lbuf;
1907 } else
1908 printme = line;
97f1f27b
YY
1909
1910 l = snprintf(cache, cache_size, "%s", printme);
e1068397
MM
1911 if (l < 0) {
1912 perror("Error writing to cache");
1913 rv = 0;
1914 goto err;
1915
1916 }
1917 if (l >= cache_size) {
1918 fprintf(stderr, "Internal error: truncated write to cache\n");
1919 rv = 0;
1920 goto err;
1921 }
1922
97f1f27b
YY
1923 cache += l;
1924 cache_size -= l;
2f919d9d 1925 total_len += l;
2dc17609
SH
1926 }
1927
97f1f27b
YY
1928 d->size = total_len;
1929 if (total_len > size ) total_len = size;
1930 memcpy(buf, d->buf, total_len);
1931
e1068397 1932 rv = total_len;
2c51f8dd
SH
1933err:
1934 if (f)
1935 fclose(f);
92c84dc4 1936 free(line);
2c51f8dd
SH
1937 free(cg);
1938 free(memlimit_str);
1939 free(memusage_str);
1940 free(memstat_str);
e1068397 1941 return rv;
23ce2127
SH
1942}
1943
1944/*
1945 * Read the cpuset.cpus for cg
2c51f8dd 1946 * Return the answer in a newly allocated string which must be freed
23ce2127
SH
1947 */
1948static char *get_cpuset(const char *cg)
1949{
1950 char *answer;
1951
1952 if (!cgm_get_value("cpuset", cg, "cpuset.cpus", &answer))
1953 return NULL;
1954 return answer;
1955}
1956
fa47bb52 1957bool cpu_in_cpuset(int cpu, const char *cpuset);
23ce2127 1958
aeb56147
SH
1959static bool cpuline_in_cpuset(const char *line, const char *cpuset)
1960{
1961 int cpu;
1962
1963 if (sscanf(line, "processor : %d", &cpu) != 1)
1964 return false;
1965 return cpu_in_cpuset(cpu, cpuset);
1966}
1967
23ce2127
SH
1968/*
1969 * check whether this is a '^processor" line in /proc/cpuinfo
1970 */
1971static bool is_processor_line(const char *line)
1972{
1973 int cpu;
1974
1975 if (sscanf(line, "processor : %d", &cpu) == 1)
1976 return true;
1977 return false;
1978}
1979
23ce2127
SH
1980static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
1981 struct fuse_file_info *fi)
1982{
1983 struct fuse_context *fc = fuse_get_context();
97f1f27b 1984 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
1985 char *cg;
1986 char *cpuset = NULL;
23ce2127 1987 char *line = NULL;
e1068397 1988 size_t linelen = 0, total_len = 0, rv = 0;
23ce2127
SH
1989 bool am_printing = false;
1990 int curcpu = -1;
97f1f27b
YY
1991 char *cache = d->buf;
1992 size_t cache_size = d->buflen;
2c51f8dd 1993 FILE *f = NULL;
23ce2127 1994
97f1f27b
YY
1995 if (offset){
1996 if (offset > d->size)
1997 return -EINVAL;
1998 int left = d->size - offset;
1999 total_len = left > size ? size: left;
2000 memcpy(buf, cache + offset, total_len);
2f919d9d 2001 return total_len;
97f1f27b 2002 }
23ce2127 2003
2c51f8dd 2004 cg = get_pid_cgroup(fc->pid, "cpuset");
23ce2127 2005 if (!cg)
53b43826 2006 return read_file("proc/cpuinfo", buf, size, d);
23ce2127
SH
2007
2008 cpuset = get_cpuset(cg);
2009 if (!cpuset)
2c51f8dd 2010 goto err;
23ce2127
SH
2011
2012 f = fopen("/proc/cpuinfo", "r");
2013 if (!f)
2c51f8dd 2014 goto err;
23ce2127
SH
2015
2016 while (getline(&line, &linelen, f) != -1) {
2017 size_t l;
2018 if (is_processor_line(line)) {
aeb56147 2019 am_printing = cpuline_in_cpuset(line, cpuset);
23ce2127
SH
2020 if (am_printing) {
2021 curcpu ++;
97f1f27b 2022 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
e1068397
MM
2023 if (l < 0) {
2024 perror("Error writing to cache");
2025 rv = 0;
2026 goto err;
2027 }
2028 if (l >= cache_size) {
2029 fprintf(stderr, "Internal error: truncated write to cache\n");
2030 rv = 0;
2031 goto err;
2032 }
97f1f27b
YY
2033 if (l < cache_size){
2034 cache += l;
2035 cache_size -= l;
2036 total_len += l;
2037 }else{
2038 cache += cache_size;
2039 total_len += cache_size;
2040 cache_size = 0;
2041 break;
2042 }
23ce2127
SH
2043 }
2044 continue;
2045 }
2046 if (am_printing) {
97f1f27b 2047 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2048 if (l < 0) {
2049 perror("Error writing to cache");
2050 rv = 0;
2051 goto err;
2052 }
2053 if (l >= cache_size) {
2054 fprintf(stderr, "Internal error: truncated write to cache\n");
2055 rv = 0;
2056 goto err;
2057 }
97f1f27b
YY
2058 if (l < cache_size) {
2059 cache += l;
2060 cache_size -= l;
2061 total_len += l;
2062 } else {
2063 cache += cache_size;
2064 total_len += cache_size;
2065 cache_size = 0;
2066 break;
2067 }
23ce2127
SH
2068 }
2069 }
2070
97f1f27b
YY
2071 d->size = total_len;
2072 if (total_len > size ) total_len = size;
2073
2074 /* read from off 0 */
2075 memcpy(buf, d->buf, total_len);
e1068397 2076 rv = total_len;
2c51f8dd
SH
2077err:
2078 if (f)
2079 fclose(f);
92c84dc4 2080 free(line);
2c51f8dd
SH
2081 free(cpuset);
2082 free(cg);
e1068397 2083 return rv;
23ce2127
SH
2084}
2085
2086static int proc_stat_read(char *buf, size_t size, off_t offset,
2087 struct fuse_file_info *fi)
2088{
aeb56147 2089 struct fuse_context *fc = fuse_get_context();
97f1f27b 2090 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2091 char *cg;
2092 char *cpuset = NULL;
aeb56147 2093 char *line = NULL;
e1068397 2094 size_t linelen = 0, total_len = 0, rv = 0;
2a0fde62 2095 int curcpu = -1; /* cpu numbering starts at 0 */
97f1f27b
YY
2096 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0;
2097 unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
2098 irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0;
2099#define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
2100 char cpuall[CPUALL_MAX_SIZE];
2101 /* reserve for cpu all */
2102 char *cache = d->buf + CPUALL_MAX_SIZE;
2103 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
2c51f8dd 2104 FILE *f = NULL;
aeb56147 2105
97f1f27b
YY
2106 if (offset){
2107 if (offset > d->size)
2108 return -EINVAL;
2109 int left = d->size - offset;
2110 total_len = left > size ? size: left;
2111 memcpy(buf, d->buf + offset, total_len);
2f919d9d 2112 return total_len;
97f1f27b 2113 }
aeb56147 2114
2c51f8dd 2115 cg = get_pid_cgroup(fc->pid, "cpuset");
aeb56147 2116 if (!cg)
53b43826 2117 return read_file("/proc/stat", buf, size, d);
aeb56147
SH
2118
2119 cpuset = get_cpuset(cg);
2120 if (!cpuset)
2c51f8dd 2121 goto err;
aeb56147
SH
2122
2123 f = fopen("/proc/stat", "r");
2124 if (!f)
2c51f8dd 2125 goto err;
aeb56147 2126
97f1f27b
YY
2127 //skip first line
2128 if (getline(&line, &linelen, f) < 0) {
2129 fprintf(stderr, "proc_stat_read read first line failed\n");
2c51f8dd 2130 goto err;
97f1f27b
YY
2131 }
2132
aeb56147
SH
2133 while (getline(&line, &linelen, f) != -1) {
2134 size_t l;
2135 int cpu;
2a0fde62 2136 char cpu_char[10]; /* That's a lot of cores */
aeb56147
SH
2137 char *c;
2138
2a0fde62
CB
2139 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
2140 /* not a ^cpuN line containing a number N, just print it */
97f1f27b 2141 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2142 if (l < 0) {
2143 perror("Error writing to cache");
2144 rv = 0;
2145 goto err;
2146 }
2147 if (l >= cache_size) {
2148 fprintf(stderr, "Internal error: truncated write to cache\n");
2149 rv = 0;
2150 goto err;
2151 }
2152 if (l < cache_size) {
97f1f27b
YY
2153 cache += l;
2154 cache_size -= l;
2155 total_len += l;
2156 continue;
e1068397 2157 } else {
97f1f27b
YY
2158 //no more space, break it
2159 cache += cache_size;
2160 total_len += cache_size;
2161 cache_size = 0;
2162 break;
2163 }
aeb56147 2164 }
2a0fde62
CB
2165
2166 if (sscanf(cpu_char, "%d", &cpu) != 1)
2167 continue;
aeb56147
SH
2168 if (!cpu_in_cpuset(cpu, cpuset))
2169 continue;
2170 curcpu ++;
2171
2172 c = strchr(line, ' ');
2173 if (!c)
2174 continue;
25c5e8fb 2175 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
e1068397
MM
2176 if (l < 0) {
2177 perror("Error writing to cache");
2178 rv = 0;
2179 goto err;
2180
2181 }
2182 if (l >= cache_size) {
2183 fprintf(stderr, "Internal error: truncated write to cache\n");
2184 rv = 0;
2185 goto err;
2186 }
2187
97f1f27b
YY
2188 cache += l;
2189 cache_size -= l;
aeb56147 2190 total_len += l;
2f919d9d 2191
97f1f27b
YY
2192 if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, &idle, &iowait, &irq,
2193 &softirq, &steal, &guest) != 9)
2194 continue;
2195 user_sum += user;
2196 nice_sum += nice;
2197 system_sum += system;
2198 idle_sum += idle;
2199 iowait_sum += iowait;
2200 irq_sum += irq;
2201 softirq_sum += softirq;
2202 steal_sum += steal;
2f919d9d 2203 guest_sum += guest;
97f1f27b
YY
2204 }
2205
2206 cache = d->buf;
2207
2f919d9d 2208 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
97f1f27b
YY
2209 "cpu ", user_sum, nice_sum, system_sum, idle_sum, iowait_sum, irq_sum, softirq_sum, steal_sum, guest_sum);
2210 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE){
2211 memcpy(cache, cpuall, cpuall_len);
2f919d9d 2212 cache += cpuall_len;
2c51f8dd 2213 } else{
97f1f27b
YY
2214 /* shouldn't happen */
2215 fprintf(stderr, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len);
2216 cpuall_len = 0;
2217 }
2218
2219 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
2220 total_len += cpuall_len;
2221 d->size = total_len;
2222 if (total_len > size ) total_len = size;
2223
2224 memcpy(buf, d->buf, total_len);
e1068397 2225 rv = total_len;
2c51f8dd
SH
2226
2227err:
2228 if (f)
2229 fclose(f);
92c84dc4 2230 free(line);
2c51f8dd
SH
2231 free(cpuset);
2232 free(cg);
e1068397 2233 return rv;
23ce2127
SH
2234}
2235
7bbf2246
SH
2236/*
2237 * How to guess what to present for uptime?
2238 * One thing we could do would be to take the date on the caller's
2239 * memory.usage_in_bytes file, which should equal the time of creation
2240 * of his cgroup. However, a task could be in a sub-cgroup of the
2241 * container. The same problem exists if we try to look at the ages
2242 * of processes in the caller's cgroup.
2243 *
2244 * So we'll fork a task that will enter the caller's pidns, mount a
2245 * fresh procfs, get the age of /proc/1, and pass that back over a pipe.
2246 *
2247 * For the second uptime #, we'll do as Stéphane had done, just copy
2248 * the number from /proc/uptime. Not sure how to best emulate 'idle'
2249 * time. Maybe someone can come up with a good algorithm and submit a
2250 * patch. Maybe something based on cpushare info?
2251 */
41bb9357
SH
2252
2253/* return age of the reaper for $pid, taken from ctime of its procdir */
2254static long int get_pid1_time(pid_t pid)
2255{
2256 char fnam[100];
ea56f722 2257 int fd, cpipe[2], ret;
41bb9357 2258 struct stat sb;
ea56f722
SH
2259 pid_t cpid;
2260 struct timeval tv;
2261 fd_set s;
2262 char v;
41bb9357
SH
2263
2264 if (unshare(CLONE_NEWNS))
2265 return 0;
2266
5ca64c2a
SG
2267 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
2268 perror("rslave mount failed");
2269 return 0;
2270 }
2271
c0adec85
SH
2272 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", pid);
2273 if (ret < 0 || ret >= sizeof(fnam))
2274 return 0;
2275
41bb9357
SH
2276 fd = open(fnam, O_RDONLY);
2277 if (fd < 0) {
2278 perror("get_pid1_time open of ns/pid");
2279 return 0;
2280 }
2281 if (setns(fd, 0)) {
2282 perror("get_pid1_time setns 1");
2283 close(fd);
2284 return 0;
2285 }
2286 close(fd);
41bb9357 2287
ea56f722
SH
2288 if (pipe(cpipe) < 0)
2289 exit(1);
41bb9357 2290
ea56f722
SH
2291loop:
2292 cpid = fork();
2293 if (cpid < 0)
41bb9357 2294 return 0;
ea56f722
SH
2295
2296 if (!cpid) {
2297 char b = '1';
2298 close(cpipe[0]);
2299 if (write(cpipe[1], &b, sizeof(char)) < 0) {
2300 fprintf(stderr, "%s (child): erorr on write: %s\n",
2301 __func__, strerror(errno));
2302 }
2303 close(cpipe[1]);
2304 umount2("/proc", MNT_DETACH);
2305 if (mount("proc", "/proc", "proc", 0, NULL)) {
2306 perror("get_pid1_time mount");
2307 return 0;
2308 }
2309 ret = lstat("/proc/1", &sb);
2310 if (ret) {
2311 perror("get_pid1_time lstat");
2312 return 0;
2313 }
2314 return time(NULL) - sb.st_ctime;
41bb9357 2315 }
ea56f722
SH
2316
2317 // give the child 1 second to be done forking and
2318 // write it's ack
2319 FD_ZERO(&s);
2320 FD_SET(cpipe[0], &s);
2321 tv.tv_sec = 1;
2322 tv.tv_usec = 0;
2323 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
2324 if (ret <= 0)
2325 goto again;
2326 ret = read(cpipe[0], &v, 1);
2327 if (ret != sizeof(char) || v != '1') {
2328 goto again;
41bb9357 2329 }
ea56f722
SH
2330
2331 wait_for_pid(cpid);
67bd113f 2332 _exit(0);
ea56f722
SH
2333
2334again:
2335 kill(cpid, SIGKILL);
2336 wait_for_pid(cpid);
2337 goto loop;
41bb9357
SH
2338}
2339
2340static long int getreaperage(pid_t qpid)
2341{
2342 int pid, mypipe[2], ret;
2343 struct timeval tv;
2344 fd_set s;
2345 long int mtime, answer = 0;
2346
2347 if (pipe(mypipe)) {
2348 return 0;
2349 }
2350
2351 pid = fork();
2352
2353 if (!pid) { // child
2354 mtime = get_pid1_time(qpid);
2355 if (write(mypipe[1], &mtime, sizeof(mtime)) != sizeof(mtime))
2356 fprintf(stderr, "Warning: bad write from getreaperage\n");
67bd113f 2357 _exit(0);
41bb9357
SH
2358 }
2359
2360 close(mypipe[1]);
2361 FD_ZERO(&s);
2362 FD_SET(mypipe[0], &s);
2363 tv.tv_sec = 1;
2364 tv.tv_usec = 0;
2365 ret = select(mypipe[0]+1, &s, NULL, NULL, &tv);
ea56f722 2366 if (ret <= 0) {
41bb9357
SH
2367 perror("select");
2368 goto out;
2369 }
2370 if (!ret) {
1420baf8 2371 fprintf(stderr, "timed out\n");
41bb9357
SH
2372 goto out;
2373 }
2374 if (read(mypipe[0], &mtime, sizeof(mtime)) != sizeof(mtime)) {
2375 perror("read");
2376 goto out;
2377 }
2378 answer = mtime;
2379
2380out:
2381 wait_for_pid(pid);
2382 close(mypipe[0]);
2383 return answer;
2384}
2385
2386static long int getprocidle(void)
2387{
2388 FILE *f = fopen("/proc/uptime", "r");
2389 long int age, idle;
92c84dc4 2390 int ret;
41bb9357
SH
2391 if (!f)
2392 return 0;
92c84dc4
SH
2393 ret = fscanf(f, "%ld %ld", &age, &idle);
2394 fclose(f);
2395 if (ret != 2)
41bb9357
SH
2396 return 0;
2397 return idle;
2398}
2399
2400/*
2401 * We read /proc/uptime and reuse its second field.
2402 * For the first field, we use the mtime for the reaper for
2403 * the calling pid as returned by getreaperage
2404 */
23ce2127
SH
2405static int proc_uptime_read(char *buf, size_t size, off_t offset,
2406 struct fuse_file_info *fi)
2407{
41bb9357 2408 struct fuse_context *fc = fuse_get_context();
97f1f27b 2409 struct file_info *d = (struct file_info *)fi->fh;
41bb9357
SH
2410 long int reaperage = getreaperage(fc->pid);;
2411 long int idletime = getprocidle();
97f1f27b 2412 size_t total_len = 0;
41bb9357 2413
97f1f27b
YY
2414 if (offset){
2415 if (offset > d->size)
2416 return -EINVAL;
2417 return 0;
2418 }
2419
2420 total_len = snprintf(buf, size, "%ld %ld\n", reaperage, idletime);
e1068397
MM
2421 if (total_len < 0){
2422 perror("Error writing to cache");
2423 return 0;
2424 }
cdcdb29b
MM
2425 if (total_len >= size){
2426 d->size = size;
2427 return size;
2428 }
2429
97f1f27b
YY
2430 d->size = total_len;
2431 return total_len;
23ce2127
SH
2432}
2433
49878439
YY
2434static int proc_diskstats_read(char *buf, size_t size, off_t offset,
2435 struct fuse_file_info *fi)
2436{
2437 char dev_name[72];
2438 struct fuse_context *fc = fuse_get_context();
97f1f27b 2439 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2440 char *cg;
2441 char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
49878439
YY
2442 *io_wait_time_str = NULL, *io_service_time_str = NULL;
2443 unsigned long read = 0, write = 0;
2444 unsigned long read_merged = 0, write_merged = 0;
2445 unsigned long read_sectors = 0, write_sectors = 0;
2446 unsigned long read_ticks = 0, write_ticks = 0;
2447 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
2448 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
2449 char *line = NULL;
e1068397 2450 size_t linelen = 0, total_len = 0, rv = 0;
49878439
YY
2451 unsigned int major = 0, minor = 0;
2452 int i = 0;
2c51f8dd 2453 FILE *f = NULL;
49878439 2454
97f1f27b
YY
2455 if (offset){
2456 if (offset > d->size)
2457 return -EINVAL;
2458 return 0;
2459 }
49878439 2460
2c51f8dd 2461 cg = get_pid_cgroup(fc->pid, "blkio");
49878439 2462 if (!cg)
53b43826 2463 return read_file("/proc/diskstats", buf, size, d);
49878439
YY
2464
2465 if (!cgm_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
2c51f8dd 2466 goto err;
49878439 2467 if (!cgm_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
2c51f8dd 2468 goto err;
49878439 2469 if (!cgm_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
2c51f8dd 2470 goto err;
49878439 2471 if (!cgm_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
2c51f8dd 2472 goto err;
49878439 2473 if (!cgm_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
2c51f8dd 2474 goto err;
49878439
YY
2475
2476
2477 f = fopen("/proc/diskstats", "r");
2478 if (!f)
2c51f8dd 2479 goto err;
49878439
YY
2480
2481 while (getline(&line, &linelen, f) != -1) {
2482 size_t l;
2483 char *printme, lbuf[256];
2484
c0adec85 2485 i = sscanf(line, "%u %u %71s", &major, &minor, dev_name);
49878439
YY
2486 if(i == 3){
2487 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
2488 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
2489 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
2490 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
2491 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
2492 read_sectors = read_sectors/512;
2493 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
2494 write_sectors = write_sectors/512;
2f919d9d 2495
49878439
YY
2496 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
2497 rd_svctm = rd_svctm/1000000;
2498 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
2499 rd_wait = rd_wait/1000000;
2500 read_ticks = rd_svctm + rd_wait;
2501
2502 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
2503 wr_svctm = wr_svctm/1000000;
2504 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
2505 wr_wait = wr_wait/1000000;
2506 write_ticks = wr_svctm + wr_wait;
2507
2508 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
2509 tot_ticks = tot_ticks/1000000;
2510 }else{
2511 continue;
2512 }
2513
2514 memset(lbuf, 0, 256);
2515 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
2f919d9d 2516 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
49878439
YY
2517 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
2518 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
2519 printme = lbuf;
2520 } else
2521 continue;
2522
2523 l = snprintf(buf, size, "%s", printme);
e1068397
MM
2524 if (l < 0) {
2525 perror("Error writing to fuse buf");
2526 rv = 0;
2527 goto err;
2528 }
2529 if (l >= size) {
2530 fprintf(stderr, "Internal error: truncated write to cache\n");
2531 rv = 0;
2532 goto err;
2533 }
49878439
YY
2534 buf += l;
2535 size -= l;
2536 total_len += l;
2537 }
2538
97f1f27b 2539 d->size = total_len;
e1068397 2540 rv = total_len;
2c51f8dd
SH
2541err:
2542 free(cg);
2543 if (f)
2544 fclose(f);
49878439 2545 free(line);
2c51f8dd
SH
2546 free(io_serviced_str);
2547 free(io_merged_str);
2548 free(io_service_bytes_str);
2549 free(io_wait_time_str);
2550 free(io_service_time_str);
e1068397 2551 return rv;
49878439
YY
2552}
2553
23ce2127
SH
2554static off_t get_procfile_size(const char *which)
2555{
2556 FILE *f = fopen(which, "r");
2557 char *line = NULL;
2558 size_t len = 0;
2559 ssize_t sz, answer = 0;
2560 if (!f)
2561 return 0;
2562
2563 while ((sz = getline(&line, &len, f)) != -1)
2564 answer += sz;
2565 fclose (f);
92c84dc4 2566 free(line);
23ce2127
SH
2567
2568 return answer;
2569}
2570
758ad80c
SH
2571static int proc_getattr(const char *path, struct stat *sb)
2572{
35629743
SH
2573 struct timespec now;
2574
2575 memset(sb, 0, sizeof(struct stat));
2576 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
2577 return -EINVAL;
2578 sb->st_uid = sb->st_gid = 0;
2579 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
2580 if (strcmp(path, "/proc") == 0) {
2581 sb->st_mode = S_IFDIR | 00555;
2582 sb->st_nlink = 2;
2583 return 0;
2584 }
2585 if (strcmp(path, "/proc/meminfo") == 0 ||
2586 strcmp(path, "/proc/cpuinfo") == 0 ||
2587 strcmp(path, "/proc/uptime") == 0 ||
49878439
YY
2588 strcmp(path, "/proc/stat") == 0 ||
2589 strcmp(path, "/proc/diskstats") == 0) {
7253e0a4 2590 sb->st_size = 0;
35629743
SH
2591 sb->st_mode = S_IFREG | 00444;
2592 sb->st_nlink = 1;
2593 return 0;
2594 }
2595
2596 return -ENOENT;
2597}
2598
2599static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2600 struct fuse_file_info *fi)
2601{
2602 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
2603 filler(buf, "meminfo", NULL, 0) != 0 ||
2604 filler(buf, "stat", NULL, 0) != 0 ||
49878439
YY
2605 filler(buf, "uptime", NULL, 0) != 0 ||
2606 filler(buf, "diskstats", NULL, 0) != 0)
758ad80c 2607 return -EINVAL;
758ad80c
SH
2608 return 0;
2609}
2610
35629743
SH
2611static int proc_open(const char *path, struct fuse_file_info *fi)
2612{
96fc5ee6
SH
2613 int type = -1;
2614 struct file_info *info;
2615
2616 if (strcmp(path, "/proc/meminfo") == 0)
2617 type = LXC_TYPE_PROC_MEMINFO;
2618 else if (strcmp(path, "/proc/cpuinfo") == 0)
2619 type = LXC_TYPE_PROC_CPUINFO;
2620 else if (strcmp(path, "/proc/uptime") == 0)
2621 type = LXC_TYPE_PROC_UPTIME;
2622 else if (strcmp(path, "/proc/stat") == 0)
2623 type = LXC_TYPE_PROC_STAT;
2624 else if (strcmp(path, "/proc/diskstats") == 0)
2625 type = LXC_TYPE_PROC_DISKSTATS;
2626 if (type == -1)
2627 return -ENOENT;
2628
2c51f8dd
SH
2629 info = malloc(sizeof(*info));
2630 if (!info)
2631 return -ENOMEM;
2632
96fc5ee6
SH
2633 memset(info, 0, sizeof(*info));
2634 info->type = type;
2635
97f1f27b 2636 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
2c51f8dd
SH
2637 do {
2638 info->buf = malloc(info->buflen);
2639 } while (!info->buf);
97f1f27b
YY
2640 memset(info->buf, 0, info->buflen);
2641 /* set actual size to buffer size */
2f919d9d 2642 info->size = info->buflen;
97f1f27b 2643
96fc5ee6
SH
2644 fi->fh = (unsigned long)info;
2645 return 0;
2646}
2647
2648static int proc_release(const char *path, struct fuse_file_info *fi)
2649{
2650 struct file_info *f = (struct file_info *)fi->fh;
2651
2652 do_release_file_info(f);
2653 return 0;
35629743
SH
2654}
2655
35629743
SH
2656static int proc_read(const char *path, char *buf, size_t size, off_t offset,
2657 struct fuse_file_info *fi)
2658{
96fc5ee6
SH
2659 struct file_info *f = (struct file_info *) fi->fh;
2660
2661 switch (f->type) {
2f919d9d 2662 case LXC_TYPE_PROC_MEMINFO:
23ce2127 2663 return proc_meminfo_read(buf, size, offset, fi);
96fc5ee6 2664 case LXC_TYPE_PROC_CPUINFO:
23ce2127 2665 return proc_cpuinfo_read(buf, size, offset, fi);
96fc5ee6 2666 case LXC_TYPE_PROC_UPTIME:
23ce2127 2667 return proc_uptime_read(buf, size, offset, fi);
96fc5ee6 2668 case LXC_TYPE_PROC_STAT:
23ce2127 2669 return proc_stat_read(buf, size, offset, fi);
96fc5ee6 2670 case LXC_TYPE_PROC_DISKSTATS:
49878439 2671 return proc_diskstats_read(buf, size, offset, fi);
96fc5ee6
SH
2672 default:
2673 return -EINVAL;
2674 }
35629743
SH
2675}
2676
2ad6d2bd
SH
2677/*
2678 * FUSE ops for /
2679 * these just delegate to the /proc and /cgroup ops as
2680 * needed
2681 */
758ad80c
SH
2682
2683static int lxcfs_getattr(const char *path, struct stat *sb)
2684{
2685 if (strcmp(path, "/") == 0) {
2686 sb->st_mode = S_IFDIR | 00755;
2687 sb->st_nlink = 2;
2688 return 0;
2689 }
2690 if (strncmp(path, "/cgroup", 7) == 0) {
2691 return cg_getattr(path, sb);
2692 }
35629743 2693 if (strncmp(path, "/proc", 5) == 0) {
758ad80c
SH
2694 return proc_getattr(path, sb);
2695 }
2696 return -EINVAL;
2697}
2698
2699static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
2700{
2701 if (strcmp(path, "/") == 0)
2702 return 0;
2703
2704 if (strncmp(path, "/cgroup", 7) == 0) {
2705 return cg_opendir(path, fi);
2706 }
35629743
SH
2707 if (strcmp(path, "/proc") == 0)
2708 return 0;
2709 return -ENOENT;
758ad80c
SH
2710}
2711
2712static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2713 struct fuse_file_info *fi)
2714{
2715 if (strcmp(path, "/") == 0) {
2716 if (filler(buf, "proc", NULL, 0) != 0 ||
2717 filler(buf, "cgroup", NULL, 0) != 0)
2718 return -EINVAL;
2719 return 0;
2720 }
35629743 2721 if (strncmp(path, "/cgroup", 7) == 0)
758ad80c 2722 return cg_readdir(path, buf, filler, offset, fi);
35629743
SH
2723 if (strcmp(path, "/proc") == 0)
2724 return proc_readdir(path, buf, filler, offset, fi);
758ad80c
SH
2725 return -EINVAL;
2726}
2727
2728static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
2729{
2730 if (strcmp(path, "/") == 0)
2731 return 0;
2732 if (strncmp(path, "/cgroup", 7) == 0) {
2733 return cg_releasedir(path, fi);
2734 }
35629743
SH
2735 if (strcmp(path, "/proc") == 0)
2736 return 0;
758ad80c
SH
2737 return -EINVAL;
2738}
2739
99978832
SH
2740static int lxcfs_open(const char *path, struct fuse_file_info *fi)
2741{
35629743 2742 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2743 return cg_open(path, fi);
35629743
SH
2744 if (strncmp(path, "/proc", 5) == 0)
2745 return proc_open(path, fi);
99978832
SH
2746
2747 return -EINVAL;
2748}
2749
2750static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
2751 struct fuse_file_info *fi)
2752{
35629743 2753 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2754 return cg_read(path, buf, size, offset, fi);
35629743
SH
2755 if (strncmp(path, "/proc", 5) == 0)
2756 return proc_read(path, buf, size, offset, fi);
99978832
SH
2757
2758 return -EINVAL;
2759}
2760
2ad6d2bd
SH
2761int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
2762 struct fuse_file_info *fi)
2763{
2764 if (strncmp(path, "/cgroup", 7) == 0) {
2765 return cg_write(path, buf, size, offset, fi);
2766 }
2767
2768 return -EINVAL;
2769}
2770
99978832
SH
2771static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
2772{
2773 return 0;
2774}
2775
2776static int lxcfs_release(const char *path, struct fuse_file_info *fi)
758ad80c 2777{
8f6e8f5e
SH
2778 if (strncmp(path, "/cgroup", 7) == 0)
2779 return cg_release(path, fi);
8f6e8f5e 2780 if (strncmp(path, "/proc", 5) == 0)
96fc5ee6 2781 return proc_release(path, fi);
8f6e8f5e
SH
2782
2783 return -EINVAL;
99978832
SH
2784}
2785
2786static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
2787{
2788 return 0;
758ad80c
SH
2789}
2790
ab54b798
SH
2791int lxcfs_mkdir(const char *path, mode_t mode)
2792{
2793 if (strncmp(path, "/cgroup", 7) == 0)
2794 return cg_mkdir(path, mode);
2795
2796 return -EINVAL;
2797}
2798
341b21ad
SH
2799int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
2800{
2801 if (strncmp(path, "/cgroup", 7) == 0)
2802 return cg_chown(path, uid, gid);
2803
2804 return -EINVAL;
2805}
2806
2ad6d2bd
SH
2807/*
2808 * cat first does a truncate before doing ops->write. This doesn't
2809 * really make sense for cgroups. So just return 0 always but do
2810 * nothing.
2811 */
2812int lxcfs_truncate(const char *path, off_t newsize)
2813{
2814 if (strncmp(path, "/cgroup", 7) == 0)
2815 return 0;
2816 return -EINVAL;
2817}
2818
50d8d5b5
SH
2819int lxcfs_rmdir(const char *path)
2820{
2821 if (strncmp(path, "/cgroup", 7) == 0)
2822 return cg_rmdir(path);
2823 return -EINVAL;
2824}
2825
fd2e4e03
SH
2826int lxcfs_chmod(const char *path, mode_t mode)
2827{
2828 if (strncmp(path, "/cgroup", 7) == 0)
2829 return cg_chmod(path, mode);
2830 return -EINVAL;
2831}
2832
758ad80c
SH
2833const struct fuse_operations lxcfs_ops = {
2834 .getattr = lxcfs_getattr,
2835 .readlink = NULL,
2836 .getdir = NULL,
2837 .mknod = NULL,
ab54b798 2838 .mkdir = lxcfs_mkdir,
758ad80c 2839 .unlink = NULL,
50d8d5b5 2840 .rmdir = lxcfs_rmdir,
758ad80c
SH
2841 .symlink = NULL,
2842 .rename = NULL,
2843 .link = NULL,
fd2e4e03 2844 .chmod = lxcfs_chmod,
341b21ad 2845 .chown = lxcfs_chown,
2ad6d2bd 2846 .truncate = lxcfs_truncate,
758ad80c 2847 .utime = NULL,
99978832
SH
2848
2849 .open = lxcfs_open,
2850 .read = lxcfs_read,
2851 .release = lxcfs_release,
2ad6d2bd 2852 .write = lxcfs_write,
99978832 2853
758ad80c 2854 .statfs = NULL,
99978832
SH
2855 .flush = lxcfs_flush,
2856 .fsync = lxcfs_fsync,
758ad80c
SH
2857
2858 .setxattr = NULL,
2859 .getxattr = NULL,
2860 .listxattr = NULL,
2861 .removexattr = NULL,
2862
2863 .opendir = lxcfs_opendir,
2864 .readdir = lxcfs_readdir,
2865 .releasedir = lxcfs_releasedir,
2866
2867 .fsyncdir = NULL,
2868 .init = NULL,
2869 .destroy = NULL,
2870 .access = NULL,
2871 .create = NULL,
2872 .ftruncate = NULL,
2873 .fgetattr = NULL,
2874};
2875
99978832 2876static void usage(const char *me)
758ad80c
SH
2877{
2878 fprintf(stderr, "Usage:\n");
2879 fprintf(stderr, "\n");
0b0f73db
SH
2880 fprintf(stderr, "%s mountpoint\n", me);
2881 fprintf(stderr, "%s -h\n", me);
758ad80c
SH
2882 exit(1);
2883}
2884
99978832 2885static bool is_help(char *w)
758ad80c
SH
2886{
2887 if (strcmp(w, "-h") == 0 ||
2888 strcmp(w, "--help") == 0 ||
2889 strcmp(w, "-help") == 0 ||
2890 strcmp(w, "help") == 0)
2891 return true;
2892 return false;
2893}
2894
0b0f73db
SH
2895void swallow_arg(int *argcp, char *argv[], char *which)
2896{
2897 int i;
2898
2899 for (i = 1; argv[i]; i++) {
2900 if (strcmp(argv[i], which) != 0)
2901 continue;
2902 for (; argv[i]; i++) {
2903 argv[i] = argv[i+1];
2904 }
2905 (*argcp)--;
2906 return;
2907 }
2908}
2909
2910void swallow_option(int *argcp, char *argv[], char *opt, char *v)
2911{
2912 int i;
2913
2914 for (i = 1; argv[i]; i++) {
2915 if (!argv[i+1])
2916 continue;
2917 if (strcmp(argv[i], opt) != 0)
2918 continue;
2919 if (strcmp(argv[i+1], v) != 0) {
2920 fprintf(stderr, "Warning: unexpected fuse option %s\n", v);
2921 exit(1);
2922 }
2923 for (; argv[i+1]; i++) {
2924 argv[i] = argv[i+2];
2925 }
2926 (*argcp) -= 2;
2927 return;
2928 }
2929}
2930
758ad80c
SH
2931int main(int argc, char *argv[])
2932{
c0adec85 2933 int ret = -1;
e5d26e0b 2934 struct lxcfs_state *d = NULL;
0b0f73db
SH
2935 /*
2936 * what we pass to fuse_main is:
2937 * argv[0] -s -f -o allow_other,directio argv[1] NULL
2938 */
2c51f8dd
SH
2939 int nargs = 5, cnt = 0;
2940 char *newargv[6];
758ad80c 2941
0b0f73db
SH
2942 /* accomodate older init scripts */
2943 swallow_arg(&argc, argv, "-s");
2944 swallow_arg(&argc, argv, "-f");
2945 swallow_option(&argc, argv, "-o", "allow_other");
2946
2e9c0b32
SH
2947 if (argc == 2 && strcmp(argv[1], "--version") == 0) {
2948 fprintf(stderr, "%s\n", VERSION);
2949 exit(0);
2950 }
0b0f73db 2951 if (argc != 2 || is_help(argv[1]))
758ad80c
SH
2952 usage(argv[0]);
2953
2c51f8dd
SH
2954 do {
2955 d = malloc(sizeof(*d));
2956 } while (!d);
0b0f73db 2957
38a76a91 2958 newargv[cnt++] = argv[0];
38a76a91
SH
2959 newargv[cnt++] = "-f";
2960 newargv[cnt++] = "-o";
2961 newargv[cnt++] = "allow_other,direct_io";
2962 newargv[cnt++] = argv[1];
2963 newargv[cnt++] = NULL;
758ad80c
SH
2964
2965 if (!cgm_escape_cgroup())
2966 fprintf(stderr, "WARNING: failed to escape to root cgroup\n");
2967
2968 if (!cgm_get_controllers(&d->subsystems))
c0adec85 2969 goto out;
758ad80c 2970
38a76a91 2971 ret = fuse_main(nargs, newargv, &lxcfs_ops, d);
2c51f8dd 2972 cgm_dbus_disconnect();
758ad80c 2973
c0adec85 2974out:
e5d26e0b 2975 free(d);
758ad80c 2976 return ret;
2183082c 2977}