]> git.proxmox.com Git - mirror_lxcfs.git/blame - lxcfs.c
Merge pull request #48 from bmiklautz/uptime_format
[mirror_lxcfs.git] / lxcfs.c
CommitLineData
758ad80c
SH
1/* lxcfs
2 *
2c51f8dd 3 * Copyright © 2014,2015 Canonical, Inc
758ad80c
SH
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
f2799430 6 * See COPYING file for details.
758ad80c
SH
7 */
8
758ad80c
SH
9#define FUSE_USE_VERSION 26
10
2183082c 11#include <stdio.h>
758ad80c
SH
12#include <dirent.h>
13#include <fcntl.h>
14#include <fuse.h>
15#include <unistd.h>
16#include <errno.h>
17#include <stdbool.h>
18#include <time.h>
19#include <string.h>
20#include <stdlib.h>
21#include <libgen.h>
41bb9357
SH
22#include <sched.h>
23#include <linux/sched.h>
a05660a6 24#include <sys/socket.h>
41bb9357
SH
25#include <sys/mount.h>
26#include <wait.h>
758ad80c 27
758ad80c 28#include "cgmanager.h"
2e9c0b32 29#include "config.h" // for VERSION
758ad80c
SH
30
31struct lxcfs_state {
32 /*
2c51f8dd 33 * a null-terminated list of the mounted subsystems. We
758ad80c
SH
34 * detect this at startup.
35 */
36 char **subsystems;
37};
38#define LXCFS_DATA ((struct lxcfs_state *) fuse_get_context()->private_data)
39
443d13f5
SH
40enum {
41 LXC_TYPE_CGDIR,
42 LXC_TYPE_CGFILE,
43 LXC_TYPE_PROC_MEMINFO,
44 LXC_TYPE_PROC_CPUINFO,
45 LXC_TYPE_PROC_UPTIME,
46 LXC_TYPE_PROC_STAT,
47 LXC_TYPE_PROC_DISKSTATS,
48};
49
c688e1b3
SH
50struct file_info {
51 char *controller;
52 char *cgroup;
8f6e8f5e 53 char *file;
443d13f5 54 int type;
c688e1b3
SH
55 char *buf; // unused as of yet
56 int buflen;
97f1f27b 57 int size; //actual data size
c688e1b3
SH
58};
59
97f1f27b
YY
60/* reserve buffer size, for cpuall in /proc/stat */
61#define BUF_RESERVE_SIZE 256
62
2c51f8dd
SH
63/*
64 * append pid to *src.
65 * src: a pointer to a char* in which ot append the pid.
66 * sz: the number of characters printed so far, minus trailing \0.
67 * asz: the allocated size so far
68 * pid: the pid to append
69 */
70static void must_strcat_pid(char **src, size_t *sz, size_t *asz, pid_t pid)
71{
72 char *d = *src;
73 char tmp[30];
74
75 sprintf(tmp, "%d\n", (int)pid);
76
77 if (!d) {
78 do {
79 d = malloc(BUF_RESERVE_SIZE);
80 } while (!d);
81 *src = d;
82 *asz = BUF_RESERVE_SIZE;
83 } else if (strlen(tmp) + sz + 1 >= asz) {
84 do {
85 d = realloc(d, *asz + BUF_RESERVE_SIZE);
86 } while (!d);
87 *src = d;
88 *asz += BUF_RESERVE_SIZE;
89 }
90 memcpy(d+*sz, tmp, strlen(tmp));
91 *sz += strlen(tmp);
92 d[*sz] = '\0';
93}
94
bae07053 95static char *must_copy_string(void *parent, const char *str)
c688e1b3 96{
2c51f8dd 97 char *dup = NULL;
c688e1b3
SH
98 if (!str)
99 return NULL;
2c51f8dd
SH
100 do {
101 dup = strdup(str);
102 } while (!dup);
103
104 return dup;
c688e1b3
SH
105}
106
a05660a6
SH
107static int wait_for_pid(pid_t pid)
108{
109 int status, ret;
110
111again:
112 ret = waitpid(pid, &status, 0);
113 if (ret == -1) {
114 if (errno == EINTR)
115 goto again;
116 return -1;
117 }
118 if (ret != pid)
119 goto again;
120 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
121 return -1;
122 return 0;
123}
124
053a659d
SH
125/*
126 * Given a open file * to /proc/pid/{u,g}id_map, and an id
127 * valid in the caller's namespace, return the id mapped into
128 * pid's namespace.
129 * Returns the mapped id, or -1 on error.
130 */
131unsigned int
132convert_id_to_ns(FILE *idfile, unsigned int in_id)
133{
134 unsigned int nsuid, // base id for a range in the idfile's namespace
135 hostuid, // base id for a range in the caller's namespace
136 count; // number of ids in this range
137 char line[400];
138 int ret;
139
140 fseek(idfile, 0L, SEEK_SET);
141 while (fgets(line, 400, idfile)) {
142 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
143 if (ret != 3)
144 continue;
145 if (hostuid + count < hostuid || nsuid + count < nsuid) {
146 /*
147 * uids wrapped around - unexpected as this is a procfile,
148 * so just bail.
149 */
647c89e5 150 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
053a659d
SH
151 nsuid, hostuid, count, line);
152 return -1;
153 }
154 if (hostuid <= in_id && hostuid+count > in_id) {
155 /*
156 * now since hostuid <= in_id < hostuid+count, and
157 * hostuid+count and nsuid+count do not wrap around,
158 * we know that nsuid+(in_id-hostuid) which must be
159 * less that nsuid+(count) must not wrap around
160 */
161 return (in_id - hostuid) + nsuid;
162 }
163 }
164
165 // no answer found
166 return -1;
167}
168
341b21ad
SH
169/*
170 * for is_privileged_over,
171 * specify whether we require the calling uid to be root in his
172 * namespace
173 */
174#define NS_ROOT_REQD true
175#define NS_ROOT_OPT false
176
2c51f8dd
SH
177#define PROCLEN 100
178
341b21ad 179static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
758ad80c 180{
2c51f8dd
SH
181 char fpath[PROCLEN];
182 int ret;
053a659d
SH
183 bool answer = false;
184 uid_t nsuid;
185
341b21ad
SH
186 if (victim == -1 || uid == -1)
187 return false;
188
189 /*
190 * If the request is one not requiring root in the namespace,
191 * then having the same uid suffices. (i.e. uid 1000 has write
192 * access to files owned by uid 1000
193 */
194 if (!req_ns_root && uid == victim)
758ad80c
SH
195 return true;
196
2c51f8dd
SH
197 ret = snprintf(fpath, PROCLEN, "/proc/%d/uid_map", pid);
198 if (ret < 0 || ret >= PROCLEN)
199 return false;
053a659d
SH
200 FILE *f = fopen(fpath, "r");
201 if (!f)
202 return false;
203
341b21ad 204 /* if caller's not root in his namespace, reject */
053a659d
SH
205 nsuid = convert_id_to_ns(f, uid);
206 if (nsuid)
207 goto out;
208
341b21ad
SH
209 /*
210 * If victim is not mapped into caller's ns, reject.
211 * XXX I'm not sure this check is needed given that fuse
212 * will be sending requests where the vfs has converted
213 */
053a659d
SH
214 nsuid = convert_id_to_ns(f, victim);
215 if (nsuid == -1)
216 goto out;
217
218 answer = true;
219
220out:
221 fclose(f);
222 return answer;
758ad80c
SH
223}
224
225static bool perms_include(int fmode, mode_t req_mode)
226{
2ad6d2bd
SH
227 mode_t r;
228
229 switch (req_mode & O_ACCMODE) {
230 case O_RDONLY:
231 r = S_IROTH;
232 break;
233 case O_WRONLY:
234 r = S_IWOTH;
235 break;
236 case O_RDWR:
237 r = S_IROTH | S_IWOTH;
238 break;
239 default:
240 return false;
241 }
242 return ((fmode & r) == r);
758ad80c
SH
243}
244
3db25a35
SH
245static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
246{
247 char *start, *end;
248
249 if (strlen(taskcg) <= strlen(querycg)) {
250 fprintf(stderr, "%s: I was fed bad input\n", __func__);
251 return NULL;
252 }
253
254 if (strcmp(querycg, "/") == 0)
2c51f8dd 255 start = strdup(taskcg + 1);
3db25a35 256 else
2c51f8dd
SH
257 start = strdup(taskcg + strlen(querycg) + 1);
258 if (!start)
259 return NULL;
3db25a35
SH
260 end = strchr(start, '/');
261 if (end)
262 *end = '\0';
263 return start;
264}
265
2c51f8dd
SH
266static void stripnewline(char *x)
267{
268 size_t l = strlen(x);
269 if (l && x[l-1] == '\n')
270 x[l-1] = '\0';
271}
272
273static char *get_pid_cgroup(pid_t pid, const char *contrl)
274{
275 char fnam[PROCLEN];
276 FILE *f;
277 char *answer = NULL;
278 char *line = NULL;
279 size_t len = 0;
280 int ret;
281
282 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
283 if (ret < 0 || ret >= PROCLEN)
284 return NULL;
285 if (!(f = fopen(fnam, "r")))
286 return NULL;
287
288 while (getline(&line, &len, f) != -1) {
289 char *c1, *c2;
290 if (!line[0])
291 continue;
292 c1 = strchr(line, ':');
293 if (!c1)
294 goto out;
295 c1++;
296 c2 = strchr(c1, ':');
297 if (!c2)
298 goto out;
299 *c2 = '\0';
300 if (strcmp(c1, contrl) != 0)
301 continue;
302 c2++;
303 stripnewline(c2);
304 do {
305 answer = strdup(c2);
306 } while (!answer);
307 break;
308 }
309
310out:
311 fclose(f);
312 free(line);
313 return answer;
314}
315
758ad80c
SH
316/*
317 * check whether a fuse context may access a cgroup dir or file
318 *
319 * If file is not null, it is a cgroup file to check under cg.
320 * If file is null, then we are checking perms on cg itself.
321 *
322 * For files we can check the mode of the list_keys result.
323 * For cgroups, we must make assumptions based on the files under the
324 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
325 * yet.
326 */
327static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
328{
2c51f8dd
SH
329 struct cgm_keys **list = NULL;
330 bool ret = false;
758ad80c
SH
331 int i;
332
333 if (!file)
334 file = "tasks";
335
336 if (*file == '/')
337 file++;
338
339 if (!cgm_list_keys(contrl, cg, &list))
340 return false;
341 for (i = 0; list[i]; i++) {
342 if (strcmp(list[i]->name, file) == 0) {
343 struct cgm_keys *k = list[i];
341b21ad 344 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
2c51f8dd
SH
345 if (perms_include(k->mode >> 6, mode)) {
346 ret = true;
347 goto out;
348 }
758ad80c
SH
349 }
350 if (fc->gid == k->gid) {
2c51f8dd
SH
351 if (perms_include(k->mode >> 3, mode)) {
352 ret = true;
353 goto out;
354 }
758ad80c 355 }
2c51f8dd
SH
356 ret = perms_include(k->mode, mode);
357 goto out;
758ad80c
SH
358 }
359 }
360
2c51f8dd
SH
361out:
362 free_keys(list);
363 return ret;
3db25a35
SH
364}
365
04b5cbdc
SH
366#define INITSCOPE "/init.scope"
367static void prune_init_slice(char *cg)
368{
369 char *point;
370 point = cg + strlen(cg) - strlen(INITSCOPE);
371 if (point < cg)
372 return;
373 if (strcmp(point, INITSCOPE) == 0) {
374 if (point == cg)
375 *(point+1) = '\0';
376 else
377 *point = '\0';
378 }
379}
380
3db25a35
SH
381/*
382 * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
383 * If caller is in /a, he may act on /a/b, but not on /b.
384 * if the answer is false and nextcg is not NULL, then *nextcg will point
2c51f8dd
SH
385 * to a string containing the next cgroup directory under cg, which must be
386 * freed by the caller.
3db25a35
SH
387 */
388static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
389{
2c51f8dd 390 char fnam[PROCLEN];
3db25a35
SH
391 FILE *f;
392 bool answer = false;
393 char *line = NULL;
394 size_t len = 0;
2c51f8dd 395 int ret;
3db25a35 396
2c51f8dd
SH
397 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
398 if (ret < 0 || ret >= PROCLEN)
399 return false;
3db25a35
SH
400 if (!(f = fopen(fnam, "r")))
401 return false;
402
403 while (getline(&line, &len, f) != -1) {
404 char *c1, *c2, *linecmp;
405 if (!line[0])
406 continue;
407 c1 = strchr(line, ':');
408 if (!c1)
409 goto out;
410 c1++;
411 c2 = strchr(c1, ':');
412 if (!c2)
413 goto out;
414 *c2 = '\0';
415 if (strcmp(c1, contrl) != 0)
416 continue;
417 c2++;
418 stripnewline(c2);
04b5cbdc 419 prune_init_slice(c2);
3db25a35
SH
420 /*
421 * callers pass in '/' for root cgroup, otherwise they pass
422 * in a cgroup without leading '/'
423 */
424 linecmp = *cg == '/' ? c2 : c2+1;
425 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
426 if (nextcg)
427 *nextcg = get_next_cgroup_dir(linecmp, cg);
428 goto out;
429 }
430 answer = true;
431 goto out;
432 }
433
434out:
435 fclose(f);
436 free(line);
437 return answer;
438}
439
758ad80c 440/*
2c51f8dd
SH
441 * given /cgroup/freezer/a/b, return "freezer".
442 * the returned char* should NOT be freed.
758ad80c
SH
443 */
444static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
445{
446 const char *p1;
2c51f8dd 447 char *contr, *slash;
758ad80c
SH
448
449 if (strlen(path) < 9)
450 return NULL;
ac5d9d48
SH
451 if (*(path+7) != '/')
452 return NULL;
758ad80c 453 p1 = path+8;
2c51f8dd
SH
454 contr = strdupa(p1);
455 if (!contr)
456 return NULL;
457 slash = strstr(contr, "/");
758ad80c
SH
458 if (slash)
459 *slash = '\0';
460
461 /* verify that it is a subsystem */
462 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
463 int i;
2c51f8dd 464 if (!list)
758ad80c 465 return NULL;
758ad80c 466 for (i = 0; list[i]; i++) {
2c51f8dd
SH
467 if (strcmp(list[i], contr) == 0)
468 return list[i];
758ad80c 469 }
758ad80c
SH
470 return NULL;
471}
472
473/*
474 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
475 * Note that the returned value may include files (keynames) etc
476 */
477static const char *find_cgroup_in_path(const char *path)
478{
479 const char *p1;
480
481 if (strlen(path) < 9)
482 return NULL;
483 p1 = strstr(path+8, "/");
484 if (!p1)
485 return NULL;
486 return p1+1;
487}
488
489static bool is_child_cgroup(const char *contr, const char *dir, const char *f)
490{
2c51f8dd
SH
491 char **list;
492 bool ret = false;
758ad80c
SH
493 int i;
494
495 if (!f)
496 return false;
497 if (*f == '/')
498 f++;
499
500 if (!cgm_list_children(contr, dir, &list))
501 return false;
502 for (i = 0; list[i]; i++) {
2c51f8dd
SH
503 if (strcmp(list[i], f) == 0) {
504 ret = true;
505 goto out;
506 }
758ad80c
SH
507 }
508
2c51f8dd
SH
509out:
510 for (i = 0; list[i]; i++)
511 free(list[i]);
512 free(list);
513 return ret;
758ad80c
SH
514}
515
516static struct cgm_keys *get_cgroup_key(const char *contr, const char *dir, const char *f)
517{
2c51f8dd
SH
518 struct cgm_keys **list = NULL;
519 struct cgm_keys *k = NULL;
758ad80c
SH
520 int i;
521
522 if (!f)
523 return NULL;
524 if (*f == '/')
525 f++;
526 if (!cgm_list_keys(contr, dir, &list))
527 return NULL;
528 for (i = 0; list[i]; i++) {
529 if (strcmp(list[i]->name, f) == 0) {
2c51f8dd
SH
530 int j;
531 // free all the keys we are not returning
532 k = list[i];
533 for (j = 0; list[j]; j++) {
534 if (i != j)
103f104c 535 free_key(list[j]);
2c51f8dd
SH
536 }
537 free(list);
758ad80c
SH
538 return k;
539 }
540 }
541
2c51f8dd 542 free_keys(list);
758ad80c
SH
543 return NULL;
544}
545
2c51f8dd
SH
546/*
547 * dir should be freed, file not
548 */
758ad80c
SH
549static void get_cgdir_and_path(const char *cg, char **dir, char **file)
550{
758ad80c
SH
551 char *p;
552
2c51f8dd
SH
553 do {
554 *dir = strdup(cg);
555 } while (!*dir);
758ad80c
SH
556 *file = strrchr(cg, '/');
557 if (!*file) {
558 *file = NULL;
559 return;
560 }
561 p = strrchr(*dir, '/');
562 *p = '\0';
563}
564
565/*
2ad6d2bd 566 * FUSE ops for /cgroup
758ad80c 567 */
2ad6d2bd 568
758ad80c
SH
569static int cg_getattr(const char *path, struct stat *sb)
570{
571 struct timespec now;
572 struct fuse_context *fc = fuse_get_context();
2c51f8dd 573 char * cgdir = NULL;
758ad80c 574 char *fpath = NULL, *path1, *path2;
2c51f8dd 575 struct cgm_keys *k = NULL;
758ad80c 576 const char *cgroup;
2c51f8dd
SH
577 const char *controller = NULL;
578 int ret = -ENOENT;
758ad80c
SH
579
580
581 if (!fc)
582 return -EIO;
583
584 memset(sb, 0, sizeof(struct stat));
585
586 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
587 return -EINVAL;
588
589 sb->st_uid = sb->st_gid = 0;
590 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
591 sb->st_size = 0;
592
593 if (strcmp(path, "/cgroup") == 0) {
594 sb->st_mode = S_IFDIR | 00755;
595 sb->st_nlink = 2;
596 return 0;
597 }
598
599 controller = pick_controller_from_path(fc, path);
600 if (!controller)
601 return -EIO;
758ad80c
SH
602 cgroup = find_cgroup_in_path(path);
603 if (!cgroup) {
604 /* this is just /cgroup/controller, return it as a dir */
605 sb->st_mode = S_IFDIR | 00755;
606 sb->st_nlink = 2;
607 return 0;
608 }
341b21ad 609
758ad80c
SH
610 get_cgdir_and_path(cgroup, &cgdir, &fpath);
611
612 if (!fpath) {
613 path1 = "/";
614 path2 = cgdir;
615 } else {
616 path1 = cgdir;
617 path2 = fpath;
618 }
619
758ad80c
SH
620 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
621 * Then check that caller's cgroup is under path if fpath is a child
622 * cgroup, or cgdir if fpath is a file */
623
624 if (is_child_cgroup(controller, path1, path2)) {
f9a05025
SH
625 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
626 /* this is just /cgroup/controller, return it as a dir */
627 sb->st_mode = S_IFDIR | 00555;
628 sb->st_nlink = 2;
2c51f8dd
SH
629 ret = 0;
630 goto out;
631 }
632 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
633 ret = -EACCES;
634 goto out;
f9a05025 635 }
758ad80c 636
053a659d
SH
637 // get uid, gid, from '/tasks' file and make up a mode
638 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
639 sb->st_mode = S_IFDIR | 00755;
640 k = get_cgroup_key(controller, cgroup, "tasks");
641 if (!k) {
053a659d
SH
642 sb->st_uid = sb->st_gid = 0;
643 } else {
053a659d
SH
644 sb->st_uid = k->uid;
645 sb->st_gid = k->gid;
646 }
2c51f8dd 647 free_key(k);
758ad80c 648 sb->st_nlink = 2;
2c51f8dd
SH
649 ret = 0;
650 goto out;
758ad80c
SH
651 }
652
653 if ((k = get_cgroup_key(controller, path1, path2)) != NULL) {
758ad80c 654 sb->st_mode = S_IFREG | k->mode;
053a659d 655 sb->st_nlink = 1;
758ad80c
SH
656 sb->st_uid = k->uid;
657 sb->st_gid = k->gid;
7253e0a4 658 sb->st_size = 0;
2c51f8dd 659 free_key(k);
adc3867b
SH
660 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) {
661 ret = -ENOENT;
662 goto out;
663 }
664 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY)) {
665 ret = -EACCES;
666 goto out;
667 }
2c51f8dd
SH
668
669 ret = 0;
758ad80c
SH
670 }
671
2c51f8dd
SH
672out:
673 free(cgdir);
674 return ret;
758ad80c 675}
2183082c 676
758ad80c 677static int cg_opendir(const char *path, struct fuse_file_info *fi)
2183082c 678{
7f163b71 679 struct fuse_context *fc = fuse_get_context();
7f163b71 680 const char *cgroup;
c688e1b3 681 struct file_info *dir_info;
2c51f8dd 682 char *controller = NULL;
7f163b71
SH
683
684 if (!fc)
685 return -EIO;
686
c688e1b3
SH
687 if (strcmp(path, "/cgroup") == 0) {
688 cgroup = NULL;
689 controller = NULL;
690 } else {
691 // return list of keys for the controller, and list of child cgroups
692 controller = pick_controller_from_path(fc, path);
693 if (!controller)
694 return -EIO;
7f163b71 695
c688e1b3
SH
696 cgroup = find_cgroup_in_path(path);
697 if (!cgroup) {
698 /* this is just /cgroup/controller, return its contents */
699 cgroup = "/";
700 }
7f163b71
SH
701 }
702
2c51f8dd 703 if (cgroup && !fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
7f163b71 704 return -EACCES;
2c51f8dd 705 }
c688e1b3
SH
706
707 /* we'll free this at cg_releasedir */
2c51f8dd
SH
708 dir_info = malloc(sizeof(*dir_info));
709 if (!dir_info)
710 return -ENOMEM;
bae07053
SH
711 dir_info->controller = must_copy_string(dir_info, controller);
712 dir_info->cgroup = must_copy_string(dir_info, cgroup);
443d13f5 713 dir_info->type = LXC_TYPE_CGDIR;
c688e1b3 714 dir_info->buf = NULL;
8f6e8f5e 715 dir_info->file = NULL;
c688e1b3
SH
716 dir_info->buflen = 0;
717
718 fi->fh = (unsigned long)dir_info;
758ad80c
SH
719 return 0;
720}
721
758ad80c
SH
722static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
723 struct fuse_file_info *fi)
724{
c688e1b3 725 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
726 struct cgm_keys **list = NULL;
727 int i, ret;
728 char *nextcg = NULL;
758ad80c 729 struct fuse_context *fc = fuse_get_context();
2c51f8dd 730 char **clist = NULL;
758ad80c 731
443d13f5 732 if (d->type != LXC_TYPE_CGDIR) {
b845ad01
SH
733 fprintf(stderr, "Internal error: file cache info used in readdir\n");
734 return -EIO;
735 }
c688e1b3
SH
736 if (!d->cgroup && !d->controller) {
737 // ls /var/lib/lxcfs/cgroup - just show list of controllers
758ad80c
SH
738 char **list = LXCFS_DATA ? LXCFS_DATA->subsystems : NULL;
739 int i;
740
741 if (!list)
742 return -EIO;
7f163b71 743
758ad80c
SH
744 for (i = 0; list[i]; i++) {
745 if (filler(buf, list[i], NULL, 0) != 0) {
746 return -EIO;
747 }
748 }
749 return 0;
750 }
751
2c51f8dd 752 if (!cgm_list_keys(d->controller, d->cgroup, &list)) {
3db25a35 753 // not a valid cgroup
2c51f8dd
SH
754 ret = -EINVAL;
755 goto out;
756 }
3db25a35 757
c688e1b3 758 if (!caller_is_in_ancestor(fc->pid, d->controller, d->cgroup, &nextcg)) {
3db25a35
SH
759 if (nextcg) {
760 int ret;
761 ret = filler(buf, nextcg, NULL, 0);
2c51f8dd
SH
762 free(nextcg);
763 if (ret != 0) {
764 ret = -EIO;
765 goto out;
766 }
3db25a35 767 }
2c51f8dd
SH
768 ret = 0;
769 goto out;
3db25a35
SH
770 }
771
758ad80c 772 for (i = 0; list[i]; i++) {
758ad80c 773 if (filler(buf, list[i]->name, NULL, 0) != 0) {
2c51f8dd
SH
774 ret = -EIO;
775 goto out;
758ad80c
SH
776 }
777 }
778
779 // now get the list of child cgroups
758ad80c 780
2c51f8dd
SH
781 if (!cgm_list_children(d->controller, d->cgroup, &clist)) {
782 ret = 0;
783 goto out;
784 }
758ad80c 785 for (i = 0; clist[i]; i++) {
758ad80c 786 if (filler(buf, clist[i], NULL, 0) != 0) {
2c51f8dd
SH
787 ret = -EIO;
788 goto out;
758ad80c
SH
789 }
790 }
2c51f8dd
SH
791 ret = 0;
792
793out:
794 free_keys(list);
795 if (clist) {
796 for (i = 0; clist[i]; i++)
797 free(clist[i]);
798 free(clist);
799 }
800 return ret;
758ad80c
SH
801}
802
8f6e8f5e
SH
803static void do_release_file_info(struct file_info *f)
804{
2c51f8dd
SH
805 if (!f)
806 return;
807 free(f->controller);
808 free(f->cgroup);
809 free(f->file);
810 free(f->buf);
811 free(f);
8f6e8f5e
SH
812}
813
758ad80c
SH
814static int cg_releasedir(const char *path, struct fuse_file_info *fi)
815{
c688e1b3
SH
816 struct file_info *d = (struct file_info *)fi->fh;
817
8f6e8f5e 818 do_release_file_info(d);
758ad80c
SH
819 return 0;
820}
821
99978832
SH
822static int cg_open(const char *path, struct fuse_file_info *fi)
823{
99978832 824 const char *cgroup;
2c51f8dd
SH
825 char *fpath = NULL, *path1, *path2, * cgdir = NULL, *controller;
826 struct cgm_keys *k = NULL;
8f6e8f5e 827 struct file_info *file_info;
99978832 828 struct fuse_context *fc = fuse_get_context();
2c51f8dd 829 int ret;
99978832
SH
830
831 if (!fc)
832 return -EIO;
833
834 controller = pick_controller_from_path(fc, path);
835 if (!controller)
836 return -EIO;
837 cgroup = find_cgroup_in_path(path);
838 if (!cgroup)
839 return -EINVAL;
840
841 get_cgdir_and_path(cgroup, &cgdir, &fpath);
842 if (!fpath) {
843 path1 = "/";
844 path2 = cgdir;
845 } else {
846 path1 = cgdir;
847 path2 = fpath;
848 }
849
8f6e8f5e 850 k = get_cgroup_key(controller, path1, path2);
2c51f8dd
SH
851 if (!k) {
852 ret = -EINVAL;
853 goto out;
854 }
855 free_key(k);
99978832 856
2c51f8dd 857 if (!fc_may_access(fc, controller, path1, path2, fi->flags)) {
8f6e8f5e 858 // should never get here
2c51f8dd
SH
859 ret = -EACCES;
860 goto out;
861 }
99978832 862
8f6e8f5e 863 /* we'll free this at cg_release */
2c51f8dd
SH
864 file_info = malloc(sizeof(*file_info));
865 if (!file_info) {
866 ret = -ENOMEM;
867 goto out;
868 }
bae07053
SH
869 file_info->controller = must_copy_string(file_info, controller);
870 file_info->cgroup = must_copy_string(file_info, path1);
871 file_info->file = must_copy_string(file_info, path2);
443d13f5 872 file_info->type = LXC_TYPE_CGFILE;
8f6e8f5e
SH
873 file_info->buf = NULL;
874 file_info->buflen = 0;
875
876 fi->fh = (unsigned long)file_info;
2c51f8dd
SH
877 ret = 0;
878
879out:
880 free(cgdir);
881 return ret;
8f6e8f5e
SH
882}
883
884static int cg_release(const char *path, struct fuse_file_info *fi)
885{
886 struct file_info *f = (struct file_info *)fi->fh;
887
888 do_release_file_info(f);
889 return 0;
99978832
SH
890}
891
a05660a6
SH
892static int msgrecv(int sockfd, void *buf, size_t len)
893{
894 struct timeval tv;
895 fd_set rfds;
896
897 FD_ZERO(&rfds);
898 FD_SET(sockfd, &rfds);
899 tv.tv_sec = 2;
900 tv.tv_usec = 0;
901
ea56f722 902 if (select(sockfd+1, &rfds, NULL, NULL, &tv) <= 0)
a05660a6
SH
903 return -1;
904 return recv(sockfd, buf, len, MSG_DONTWAIT);
905}
906
01e71852
SH
907#define SEND_CREDS_OK 0
908#define SEND_CREDS_NOTSK 1
909#define SEND_CREDS_FAIL 2
910static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
a05660a6
SH
911{
912 struct msghdr msg = { 0 };
913 struct iovec iov;
914 struct cmsghdr *cmsg;
915 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
916 char buf[1];
917 buf[0] = 'p';
918
01e71852
SH
919 if (pingfirst) {
920 if (msgrecv(sock, buf, 1) != 1) {
1420baf8 921 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
01e71852
SH
922 __func__);
923 return SEND_CREDS_FAIL;
924 }
a05660a6
SH
925 }
926
927 msg.msg_control = cmsgbuf;
928 msg.msg_controllen = sizeof(cmsgbuf);
929
930 cmsg = CMSG_FIRSTHDR(&msg);
931 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
932 cmsg->cmsg_level = SOL_SOCKET;
933 cmsg->cmsg_type = SCM_CREDENTIALS;
934 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
935
936 msg.msg_name = NULL;
937 msg.msg_namelen = 0;
938
939 buf[0] = v;
940 iov.iov_base = buf;
941 iov.iov_len = sizeof(buf);
942 msg.msg_iov = &iov;
943 msg.msg_iovlen = 1;
944
945 if (sendmsg(sock, &msg, 0) < 0) {
1420baf8 946 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
a05660a6
SH
947 strerror(errno));
948 if (errno == 3)
01e71852
SH
949 return SEND_CREDS_NOTSK;
950 return SEND_CREDS_FAIL;
a05660a6
SH
951 }
952
01e71852 953 return SEND_CREDS_OK;
a05660a6
SH
954}
955
956static bool recv_creds(int sock, struct ucred *cred, char *v)
957{
958 struct msghdr msg = { 0 };
959 struct iovec iov;
960 struct cmsghdr *cmsg;
961 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
962 char buf[1];
963 int ret;
964 int optval = 1;
6ee867dc
SH
965 struct timeval tv;
966 fd_set rfds;
a05660a6
SH
967
968 *v = '1';
969
970 cred->pid = -1;
971 cred->uid = -1;
972 cred->gid = -1;
973
974 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
1420baf8 975 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
a05660a6
SH
976 return false;
977 }
978 buf[0] = '1';
979 if (write(sock, buf, 1) != 1) {
1420baf8 980 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
a05660a6
SH
981 return false;
982 }
983
984 msg.msg_name = NULL;
985 msg.msg_namelen = 0;
986 msg.msg_control = cmsgbuf;
987 msg.msg_controllen = sizeof(cmsgbuf);
988
989 iov.iov_base = buf;
990 iov.iov_len = sizeof(buf);
991 msg.msg_iov = &iov;
992 msg.msg_iovlen = 1;
993
6ee867dc
SH
994 FD_ZERO(&rfds);
995 FD_SET(sock, &rfds);
996 tv.tv_sec = 2;
997 tv.tv_usec = 0;
ea56f722 998 if (select(sock+1, &rfds, NULL, NULL, &tv) <= 0) {
6ee867dc
SH
999 fprintf(stderr, "Failed to select for scm_cred: %s\n",
1000 strerror(errno));
1001 return false;
1002 }
1003 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
a05660a6 1004 if (ret < 0) {
1420baf8 1005 fprintf(stderr, "Failed to receive scm_cred: %s\n",
a05660a6
SH
1006 strerror(errno));
1007 return false;
1008 }
1009
1010 cmsg = CMSG_FIRSTHDR(&msg);
1011
1012 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
1013 cmsg->cmsg_level == SOL_SOCKET &&
1014 cmsg->cmsg_type == SCM_CREDENTIALS) {
1015 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
1016 }
1017 *v = buf[0];
1018
1019 return true;
1020}
1021
1022
1023/*
4775fba1
SH
1024 * pid_to_ns - reads pids from a ucred over a socket, then writes the
1025 * int value back over the socket. This shifts the pid from the
1026 * sender's pidns into tpid's pidns.
a05660a6 1027 */
4775fba1 1028static void pid_to_ns(int sock, pid_t tpid)
a05660a6
SH
1029{
1030 char v = '0';
1031 struct ucred cred;
1032
1033 while (recv_creds(sock, &cred, &v)) {
1034 if (v == '1')
67bd113f 1035 _exit(0);
a05660a6 1036 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
67bd113f 1037 _exit(1);
a05660a6 1038 }
67bd113f 1039 _exit(0);
a05660a6
SH
1040}
1041
1042/*
4775fba1 1043 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
a05660a6 1044 * in your old pidns. Only children which you fork will be in the target
4775fba1 1045 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
a05660a6
SH
1046 * actually convert pids
1047 */
4775fba1 1048static void pid_to_ns_wrapper(int sock, pid_t tpid)
a05660a6 1049{
ea56f722 1050 int newnsfd = -1, ret, cpipe[2];
a05660a6
SH
1051 char fnam[100];
1052 pid_t cpid;
ea56f722
SH
1053 struct timeval tv;
1054 fd_set s;
1055 char v;
a05660a6 1056
c0adec85
SH
1057 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1058 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1059 _exit(1);
a05660a6
SH
1060 newnsfd = open(fnam, O_RDONLY);
1061 if (newnsfd < 0)
67bd113f 1062 _exit(1);
a05660a6 1063 if (setns(newnsfd, 0) < 0)
67bd113f 1064 _exit(1);
a05660a6
SH
1065 close(newnsfd);
1066
ea56f722 1067 if (pipe(cpipe) < 0)
67bd113f 1068 _exit(1);
a05660a6 1069
ea56f722
SH
1070loop:
1071 cpid = fork();
a05660a6 1072 if (cpid < 0)
67bd113f 1073 _exit(1);
ea56f722
SH
1074
1075 if (!cpid) {
1076 char b = '1';
1077 close(cpipe[0]);
1078 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1079 fprintf(stderr, "%s (child): erorr on write: %s\n",
1080 __func__, strerror(errno));
1081 }
1082 close(cpipe[1]);
4775fba1 1083 pid_to_ns(sock, tpid);
ea56f722
SH
1084 }
1085 // give the child 1 second to be done forking and
1086 // write it's ack
1087 FD_ZERO(&s);
1088 FD_SET(cpipe[0], &s);
1089 tv.tv_sec = 1;
1090 tv.tv_usec = 0;
1091 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
1092 if (ret <= 0)
1093 goto again;
1094 ret = read(cpipe[0], &v, 1);
1095 if (ret != sizeof(char) || v != '1') {
1096 goto again;
1097 }
1098
a05660a6 1099 if (!wait_for_pid(cpid))
67bd113f
SH
1100 _exit(1);
1101 _exit(0);
ea56f722
SH
1102
1103again:
1104 kill(cpid, SIGKILL);
1105 wait_for_pid(cpid);
1106 goto loop;
a05660a6
SH
1107}
1108
1109/*
1110 * To read cgroup files with a particular pid, we will setns into the child
1111 * pidns, open a pipe, fork a child - which will be the first to really be in
1112 * the child ns - which does the cgm_get_value and writes the data to the pipe.
1113 */
1114static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
1115{
1116 int sock[2] = {-1, -1};
2c51f8dd 1117 char *tmpdata = NULL;
a05660a6
SH
1118 int ret;
1119 pid_t qpid, cpid = -1;
1120 bool answer = false;
1121 char v = '0';
1122 struct ucred cred;
1123 struct timeval tv;
2c51f8dd 1124 size_t sz = 0, asz = 0;
a05660a6
SH
1125 fd_set s;
1126
1127 if (!cgm_get_value(contrl, cg, file, &tmpdata))
1128 return false;
1129
1130 /*
1131 * Now we read the pids from returned data one by one, pass
1132 * them into a child in the target namespace, read back the
1133 * translated pids, and put them into our to-return data
1134 */
1135
1136 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1137 perror("socketpair");
2c51f8dd
SH
1138 free(tmpdata);
1139 return false;
a05660a6
SH
1140 }
1141
1142 cpid = fork();
1143 if (cpid == -1)
1144 goto out;
1145
1146 if (!cpid) // child
4775fba1 1147 pid_to_ns_wrapper(sock[1], tpid);
a05660a6
SH
1148
1149 char *ptr = tmpdata;
1150 cred.uid = 0;
1151 cred.gid = 0;
1152 while (sscanf(ptr, "%d\n", &qpid) == 1) {
1153 cred.pid = qpid;
01e71852
SH
1154 ret = send_creds(sock[0], &cred, v, true);
1155
1156 if (ret == SEND_CREDS_NOTSK)
1157 goto next;
1158 if (ret == SEND_CREDS_FAIL)
a05660a6
SH
1159 goto out;
1160
1161 // read converted results
1162 FD_ZERO(&s);
1163 FD_SET(sock[0], &s);
6ee867dc 1164 tv.tv_sec = 2;
a05660a6
SH
1165 tv.tv_usec = 0;
1166 ret = select(sock[0]+1, &s, NULL, NULL, &tv);
1167 if (ret <= 0) {
6ee867dc
SH
1168 fprintf(stderr, "%s: select error waiting for pid from child: %s\n",
1169 __func__, strerror(errno));
a05660a6
SH
1170 goto out;
1171 }
1172 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1173 fprintf(stderr, "%s: error reading pid from child: %s\n",
1174 __func__, strerror(errno));
a05660a6
SH
1175 goto out;
1176 }
2c51f8dd 1177 must_strcat_pid(d, &sz, &asz, qpid);
01e71852 1178next:
a05660a6
SH
1179 ptr = strchr(ptr, '\n');
1180 if (!ptr)
1181 break;
1182 ptr++;
1183 }
1184
1185 cred.pid = getpid();
1186 v = '1';
01e71852 1187 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
a05660a6 1188 // failed to ask child to exit
6ee867dc
SH
1189 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
1190 __func__, strerror(errno));
a05660a6
SH
1191 goto out;
1192 }
1193
1194 answer = true;
1195
1196out:
2c51f8dd 1197 free(tmpdata);
a05660a6
SH
1198 if (cpid != -1)
1199 wait_for_pid(cpid);
1200 if (sock[0] != -1) {
1201 close(sock[0]);
1202 close(sock[1]);
1203 }
1204 return answer;
1205}
1206
99978832
SH
1207static int cg_read(const char *path, char *buf, size_t size, off_t offset,
1208 struct fuse_file_info *fi)
1209{
99978832 1210 struct fuse_context *fc = fuse_get_context();
8f6e8f5e 1211 struct file_info *f = (struct file_info *)fi->fh;
2c51f8dd
SH
1212 struct cgm_keys *k = NULL;
1213 char *data = NULL;
1214 int ret, s;
1215 bool r;
99978832 1216
443d13f5 1217 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1218 fprintf(stderr, "Internal error: directory cache info used in cg_read\n");
1219 return -EIO;
1220 }
1221
99978832 1222 if (offset)
7253e0a4 1223 return 0;
99978832
SH
1224
1225 if (!fc)
1226 return -EIO;
1227
8f6e8f5e 1228 if (!f->controller)
99978832
SH
1229 return -EINVAL;
1230
2c51f8dd
SH
1231 if ((k = get_cgroup_key(f->controller, f->cgroup, f->file)) == NULL) {
1232 return -EINVAL;
1233 }
1234 free_key(k);
99978832 1235
99978832 1236
2c51f8dd
SH
1237 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY)) { // should never get here
1238 ret = -EACCES;
1239 goto out;
1240 }
a05660a6 1241
2c51f8dd
SH
1242 if (strcmp(f->file, "tasks") == 0 ||
1243 strcmp(f->file, "/tasks") == 0 ||
1244 strcmp(f->file, "/cgroup.procs") == 0 ||
1245 strcmp(f->file, "cgroup.procs") == 0)
1246 // special case - we have to translate the pids
1247 r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data);
1248 else
1249 r = cgm_get_value(f->controller, f->cgroup, f->file, &data);
99978832 1250
2c51f8dd
SH
1251 if (!r) {
1252 ret = -EINVAL;
1253 goto out;
1254 }
99978832 1255
2c51f8dd
SH
1256 if (!data) {
1257 ret = 0;
1258 goto out;
99978832 1259 }
2c51f8dd
SH
1260 s = strlen(data);
1261 if (s > size)
1262 s = size;
1263 memcpy(buf, data, s);
1264 if (s > 0 && s < size && data[s-1] != '\n')
1265 buf[s++] = '\n';
99978832 1266
2c51f8dd
SH
1267 ret = s;
1268
1269out:
1270 free(data);
1271 return ret;
99978832
SH
1272}
1273
4775fba1
SH
1274static void pid_from_ns(int sock, pid_t tpid)
1275{
1276 pid_t vpid;
1277 struct ucred cred;
1278 char v;
6ee867dc
SH
1279 struct timeval tv;
1280 fd_set s;
1281 int ret;
4775fba1
SH
1282
1283 cred.uid = 0;
1284 cred.gid = 0;
6ee867dc
SH
1285 while (1) {
1286 FD_ZERO(&s);
1287 FD_SET(sock, &s);
1288 tv.tv_sec = 2;
1289 tv.tv_usec = 0;
1290 ret = select(sock+1, &s, NULL, NULL, &tv);
ea56f722
SH
1291 if (ret <= 0) {
1292 fprintf(stderr, "%s: bad select before read from parent: %s\n",
6ee867dc 1293 __func__, strerror(errno));
67bd113f 1294 _exit(1);
6ee867dc
SH
1295 }
1296 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
1297 fprintf(stderr, "%s: bad read from parent: %s\n",
1298 __func__, strerror(errno));
67bd113f 1299 _exit(1);
6ee867dc 1300 }
4775fba1 1301 if (vpid == -1) // done
01e71852 1302 break;
4775fba1
SH
1303 v = '0';
1304 cred.pid = vpid;
01e71852 1305 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
4775fba1
SH
1306 v = '1';
1307 cred.pid = getpid();
01e71852 1308 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
67bd113f 1309 _exit(1);
4775fba1
SH
1310 }
1311 }
67bd113f 1312 _exit(0);
4775fba1
SH
1313}
1314
1315static void pid_from_ns_wrapper(int sock, pid_t tpid)
1316{
ea56f722 1317 int newnsfd = -1, ret, cpipe[2];
4775fba1
SH
1318 char fnam[100];
1319 pid_t cpid;
ea56f722
SH
1320 fd_set s;
1321 struct timeval tv;
1322 char v;
4775fba1 1323
c0adec85
SH
1324 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1325 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1326 _exit(1);
4775fba1
SH
1327 newnsfd = open(fnam, O_RDONLY);
1328 if (newnsfd < 0)
67bd113f 1329 _exit(1);
4775fba1 1330 if (setns(newnsfd, 0) < 0)
67bd113f 1331 _exit(1);
4775fba1
SH
1332 close(newnsfd);
1333
ea56f722 1334 if (pipe(cpipe) < 0)
67bd113f 1335 _exit(1);
ea56f722
SH
1336
1337loop:
4775fba1
SH
1338 cpid = fork();
1339
1340 if (cpid < 0)
67bd113f 1341 _exit(1);
ea56f722
SH
1342
1343 if (!cpid) {
1344 char b = '1';
1345 close(cpipe[0]);
1346 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1347 fprintf(stderr, "%s (child): erorr on write: %s\n",
1348 __func__, strerror(errno));
1349 }
1350 close(cpipe[1]);
4775fba1 1351 pid_from_ns(sock, tpid);
ea56f722
SH
1352 }
1353
1354 // give the child 1 second to be done forking and
1355 // write it's ack
1356 FD_ZERO(&s);
1357 FD_SET(cpipe[0], &s);
1358 tv.tv_sec = 1;
1359 tv.tv_usec = 0;
1360 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
1361 if (ret <= 0)
1362 goto again;
1363 ret = read(cpipe[0], &v, 1);
1364 if (ret != sizeof(char) || v != '1') {
1365 goto again;
1366 }
1367
4775fba1 1368 if (!wait_for_pid(cpid))
67bd113f
SH
1369 _exit(1);
1370 _exit(0);
ea56f722
SH
1371
1372again:
1373 kill(cpid, SIGKILL);
1374 wait_for_pid(cpid);
1375 goto loop;
4775fba1
SH
1376}
1377
1378static bool do_write_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, const char *buf)
1379{
1380 int sock[2] = {-1, -1};
1381 pid_t qpid, cpid = -1;
1382 bool answer = false, fail = false;
1383
1384 /*
1385 * write the pids to a socket, have helper in writer's pidns
1386 * call movepid for us
1387 */
1388 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1389 perror("socketpair");
1390 exit(1);
1391 }
1392
1393 cpid = fork();
1394 if (cpid == -1)
1395 goto out;
1396
1397 if (!cpid) // child
1398 pid_from_ns_wrapper(sock[1], tpid);
1399
1400 const char *ptr = buf;
1401 while (sscanf(ptr, "%d", &qpid) == 1) {
1402 struct ucred cred;
1403 char v;
1404
1405 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1406 fprintf(stderr, "%s: error writing pid to child: %s\n",
1407 __func__, strerror(errno));
4775fba1
SH
1408 goto out;
1409 }
1410
01e71852
SH
1411 if (recv_creds(sock[0], &cred, &v)) {
1412 if (v == '0') {
1413 if (!cgm_move_pid(contrl, cg, cred.pid))
1414 fail = true;
1415 }
4775fba1
SH
1416 }
1417
1418 ptr = strchr(ptr, '\n');
1419 if (!ptr)
1420 break;
1421 ptr++;
1422 }
1423
1424 /* All good, write the value */
1425 qpid = -1;
1426 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
1420baf8 1427 fprintf(stderr, "Warning: failed to ask child to exit\n");
4775fba1
SH
1428
1429 if (!fail)
1430 answer = true;
1431
1432out:
1433 if (cpid != -1)
1434 wait_for_pid(cpid);
1435 if (sock[0] != -1) {
1436 close(sock[0]);
1437 close(sock[1]);
1438 }
1439 return answer;
1440}
1441
2ad6d2bd
SH
1442int cg_write(const char *path, const char *buf, size_t size, off_t offset,
1443 struct fuse_file_info *fi)
1444{
2ad6d2bd 1445 struct fuse_context *fc = fuse_get_context();
2c51f8dd
SH
1446 char *localbuf = NULL;
1447 struct cgm_keys *k = NULL;
8f6e8f5e 1448 struct file_info *f = (struct file_info *)fi->fh;
2c51f8dd 1449 bool r;
2ad6d2bd 1450
443d13f5 1451 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1452 fprintf(stderr, "Internal error: directory cache info used in cg_write\n");
1453 return -EIO;
1454 }
1455
2ad6d2bd 1456 if (offset)
7253e0a4 1457 return 0;
2ad6d2bd
SH
1458
1459 if (!fc)
1460 return -EIO;
1461
2c51f8dd 1462 localbuf = alloca(size+1);
47cbf0e5
SH
1463 localbuf[size] = '\0';
1464 memcpy(localbuf, buf, size);
2ad6d2bd 1465
2c51f8dd
SH
1466 if ((k = get_cgroup_key(f->controller, f->cgroup, f->file)) == NULL) {
1467 size = -EINVAL;
1468 goto out;
1469 }
2ad6d2bd 1470
2c51f8dd
SH
1471 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY)) {
1472 size = -EACCES;
1473 goto out;
1474 }
4775fba1 1475
2c51f8dd
SH
1476 if (strcmp(f->file, "tasks") == 0 ||
1477 strcmp(f->file, "/tasks") == 0 ||
1478 strcmp(f->file, "/cgroup.procs") == 0 ||
1479 strcmp(f->file, "cgroup.procs") == 0)
1480 // special case - we have to translate the pids
1481 r = do_write_pids(fc->pid, f->controller, f->cgroup, f->file, localbuf);
1482 else
1483 r = cgm_set_value(f->controller, f->cgroup, f->file, localbuf);
2ad6d2bd 1484
2c51f8dd
SH
1485 if (!r)
1486 size = -EINVAL;
2ad6d2bd 1487
2c51f8dd
SH
1488out:
1489 free_key(k);
1490 return size;
2ad6d2bd
SH
1491}
1492
341b21ad
SH
1493int cg_chown(const char *path, uid_t uid, gid_t gid)
1494{
1495 struct fuse_context *fc = fuse_get_context();
2c51f8dd
SH
1496 char *cgdir = NULL, *fpath = NULL, *path1, *path2, *controller;
1497 struct cgm_keys *k = NULL;
341b21ad 1498 const char *cgroup;
2c51f8dd 1499 int ret;
341b21ad
SH
1500
1501 if (!fc)
1502 return -EIO;
1503
1504 if (strcmp(path, "/cgroup") == 0)
1505 return -EINVAL;
1506
1507 controller = pick_controller_from_path(fc, path);
1508 if (!controller)
f9a05025 1509 return -EINVAL;
341b21ad
SH
1510 cgroup = find_cgroup_in_path(path);
1511 if (!cgroup)
1512 /* this is just /cgroup/controller */
1513 return -EINVAL;
1514
1515 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1516
1517 if (!fpath) {
1518 path1 = "/";
1519 path2 = cgdir;
1520 } else {
1521 path1 = cgdir;
1522 path2 = fpath;
1523 }
1524
1525 if (is_child_cgroup(controller, path1, path2)) {
1526 // get uid, gid, from '/tasks' file and make up a mode
1527 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1528 k = get_cgroup_key(controller, cgroup, "tasks");
1529
1530 } else
1531 k = get_cgroup_key(controller, path1, path2);
1532
2c51f8dd
SH
1533 if (!k) {
1534 ret = -EINVAL;
1535 goto out;
1536 }
341b21ad
SH
1537
1538 /*
1539 * This being a fuse request, the uid and gid must be valid
1540 * in the caller's namespace. So we can just check to make
1541 * sure that the caller is root in his uid, and privileged
1542 * over the file's current owner.
1543 */
2c51f8dd
SH
1544 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) {
1545 ret = -EACCES;
1546 goto out;
1547 }
341b21ad 1548
2c51f8dd
SH
1549 if (!cgm_chown_file(controller, cgroup, uid, gid)) {
1550 ret = -EINVAL;
1551 goto out;
1552 }
1553
1554 ret = 0;
1555
1556out:
1557 free_key(k);
1558 free(cgdir);
1559
1560 return ret;
341b21ad 1561}
2ad6d2bd 1562
fd2e4e03
SH
1563int cg_chmod(const char *path, mode_t mode)
1564{
0a1bb5ea 1565 struct fuse_context *fc = fuse_get_context();
2c51f8dd
SH
1566 char * cgdir = NULL, *fpath = NULL, *path1, *path2, *controller;
1567 struct cgm_keys *k = NULL;
0a1bb5ea 1568 const char *cgroup;
2c51f8dd 1569 int ret;
0a1bb5ea
SH
1570
1571 if (!fc)
1572 return -EIO;
1573
1574 if (strcmp(path, "/cgroup") == 0)
1575 return -EINVAL;
1576
1577 controller = pick_controller_from_path(fc, path);
1578 if (!controller)
f9a05025 1579 return -EINVAL;
0a1bb5ea
SH
1580 cgroup = find_cgroup_in_path(path);
1581 if (!cgroup)
1582 /* this is just /cgroup/controller */
1583 return -EINVAL;
1584
1585 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1586
1587 if (!fpath) {
1588 path1 = "/";
1589 path2 = cgdir;
1590 } else {
1591 path1 = cgdir;
1592 path2 = fpath;
1593 }
1594
1595 if (is_child_cgroup(controller, path1, path2)) {
1596 // get uid, gid, from '/tasks' file and make up a mode
1597 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1598 k = get_cgroup_key(controller, cgroup, "tasks");
1599
1600 } else
1601 k = get_cgroup_key(controller, path1, path2);
1602
2c51f8dd
SH
1603 if (!k) {
1604 ret = -EINVAL;
1605 goto out;
1606 }
0a1bb5ea
SH
1607
1608 /*
1609 * This being a fuse request, the uid and gid must be valid
1610 * in the caller's namespace. So we can just check to make
1611 * sure that the caller is root in his uid, and privileged
1612 * over the file's current owner.
1613 */
2c51f8dd
SH
1614 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
1615 ret = -EPERM;
1616 goto out;
1617 }
0a1bb5ea 1618
2c51f8dd
SH
1619 if (!cgm_chmod_file(controller, cgroup, mode)) {
1620 ret = -EINVAL;
1621 goto out;
1622 }
1623
1624 ret = 0;
1625out:
1626 free_key(k);
1627 free(cgdir);
1628 return ret;
fd2e4e03
SH
1629}
1630
3e13a059
SH
1631#define LXCFS_MKDIR_PATH LIBEXECDIR "/lxcfs/lxcfs_mkdir"
1632
ab54b798
SH
1633int cg_mkdir(const char *path, mode_t mode)
1634{
1635 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1636 char *fpath = NULL, *path1, *cgdir = NULL, *controller;
ab54b798 1637 const char *cgroup;
2c51f8dd 1638 int ret;
ab54b798 1639
ab54b798
SH
1640 if (!fc)
1641 return -EIO;
1642
1643
1644 controller = pick_controller_from_path(fc, path);
1645 if (!controller)
f9a05025 1646 return -EINVAL;
ab54b798
SH
1647
1648 cgroup = find_cgroup_in_path(path);
1649 if (!cgroup)
f9a05025 1650 return -EINVAL;
ab54b798
SH
1651
1652 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1653 if (!fpath)
1654 path1 = "/";
1655 else
1656 path1 = cgdir;
1657
2c51f8dd
SH
1658 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) {
1659 ret = -EACCES;
1660 goto out;
1661 }
1662 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) {
1663 ret = -EACCES;
1664 goto out;
1665 }
ab54b798 1666
2c51f8dd
SH
1667 if (fc->uid == 0 && fc->gid == 0) {
1668 if (!cgm_create(controller, cgroup)) {
1669 ret = -EINVAL;
1670 goto out;
1671 }
1672 } else {
1673 /*
40b8c791 1674 * exec a helper so as to get a clean dbus connection
2c51f8dd
SH
1675 * 17 for lxcfs_mkdir, and spaces and newline and \0. 50 for two ints.
1676 * 50 for two ints
1677 */
1678 size_t len = strlen(cgroup) + strlen(controller) + 17 + 50;
1679 char *cmd = alloca(len);
3e13a059 1680 ret = snprintf(cmd, len, "%s %d %d %s %s\n", LXCFS_MKDIR_PATH,
2c51f8dd
SH
1681 fc->uid, fc->gid, controller, cgroup);
1682 if (ret < 0 || ret >= len) {
1683 ret = -EINVAL;
1684 goto out;
1685 }
1686 ret = system(cmd);
1687 if (ret != 0)
1688 goto out;
1689 }
ab54b798 1690
2c51f8dd 1691 ret = 0;
ab54b798 1692
2c51f8dd
SH
1693out:
1694 free(cgdir);
1695 return ret;
ab54b798
SH
1696}
1697
50d8d5b5
SH
1698static int cg_rmdir(const char *path)
1699{
1700 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1701 char *fpath = NULL, *cgdir = NULL, *controller;
50d8d5b5 1702 const char *cgroup;
2c51f8dd 1703 int ret;
50d8d5b5
SH
1704
1705 if (!fc)
1706 return -EIO;
1707
50d8d5b5
SH
1708 controller = pick_controller_from_path(fc, path);
1709 if (!controller)
f9a05025 1710 return -EINVAL;
50d8d5b5
SH
1711
1712 cgroup = find_cgroup_in_path(path);
1713 if (!cgroup)
f9a05025 1714 return -EINVAL;
50d8d5b5
SH
1715
1716 get_cgdir_and_path(cgroup, &cgdir, &fpath);
2c51f8dd
SH
1717 if (!fpath) {
1718 ret = -EINVAL;
1719 goto out;
1720 }
50d8d5b5 1721
2c51f8dd
SH
1722 fprintf(stderr, "rmdir: verifying access to %s:%s (req path %s)\n",
1723 controller, cgdir, path);
1724 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) {
1725 ret = -EACCES;
1726 goto out;
1727 }
1728 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
1729 ret = -EACCES;
1730 goto out;
1731 }
50d8d5b5 1732
2c51f8dd
SH
1733 if (!cgm_remove(controller, cgroup)) {
1734 ret = -EINVAL;
1735 goto out;
1736 }
50d8d5b5 1737
2c51f8dd
SH
1738 ret = 0;
1739
1740out:
1741 free(cgdir);
1742 return ret;
50d8d5b5
SH
1743}
1744
2dc17609
SH
1745static bool startswith(const char *line, const char *pref)
1746{
1747 if (strncmp(line, pref, strlen(pref)) == 0)
1748 return true;
1749 return false;
1750}
1751
1752static void get_mem_cached(char *memstat, unsigned long *v)
1753{
1754 char *eol;
1755
1756 *v = 0;
1757 while (*memstat) {
1758 if (startswith(memstat, "total_cache")) {
1759 sscanf(memstat + 11, "%lu", v);
1760 *v /= 1024;
1761 return;
1762 }
1763 eol = strchr(memstat, '\n');
1764 if (!eol)
1765 return;
1766 memstat = eol+1;
1767 }
1768}
1769
49878439 1770static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
2f919d9d 1771{
49878439
YY
1772 char *eol;
1773 char key[32];
2f919d9d 1774
49878439
YY
1775 memset(key, 0, 32);
1776 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
2f919d9d 1777
49878439
YY
1778 size_t len = strlen(key);
1779 *v = 0;
1780
1781 while (*str) {
1782 if (startswith(str, key)) {
2f919d9d
SH
1783 sscanf(str + len, "%lu", v);
1784 return;
1785 }
1786 eol = strchr(str, '\n');
49878439 1787 if (!eol)
2f919d9d 1788 return;
49878439
YY
1789 str = eol+1;
1790 }
1791}
1792
53b43826
SH
1793static int read_file(const char *path, char *buf, size_t size,
1794 struct file_info *d)
1795{
1796 size_t linelen = 0, total_len = 0, rv = 0;
1797 char *line = NULL;
1798 char *cache = d->buf;
1799 size_t cache_size = d->buflen;
1800 FILE *f = fopen(path, "r");
1801 if (!f)
1802 return 0;
1803
1804 while (getline(&line, &linelen, f) != -1) {
1805 size_t l = snprintf(cache, cache_size, "%s", line);
1806 if (l < 0) {
1807 perror("Error writing to cache");
1808 rv = 0;
1809 goto err;
1810 }
1811 if (l >= cache_size) {
1812 fprintf(stderr, "Internal error: truncated write to cache\n");
1813 rv = 0;
1814 goto err;
1815 }
1816 if (l < cache_size) {
1817 cache += l;
1818 cache_size -= l;
1819 total_len += l;
1820 } else {
1821 cache += cache_size;
1822 total_len += cache_size;
1823 cache_size = 0;
1824 break;
1825 }
1826 }
1827
1828 d->size = total_len;
1829 if (total_len > size ) total_len = size;
1830
1831 /* read from off 0 */
1832 memcpy(buf, d->buf, total_len);
1833 rv = total_len;
1834 err:
1835 fclose(f);
1836 free(line);
1837 return rv;
1838}
1839
758ad80c 1840/*
2ad6d2bd 1841 * FUSE ops for /proc
758ad80c 1842 */
758ad80c 1843
23ce2127
SH
1844static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1845 struct fuse_file_info *fi)
1846{
2dc17609 1847 struct fuse_context *fc = fuse_get_context();
97f1f27b 1848 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
1849 char *cg;
1850 char *memlimit_str = NULL, *memusage_str = NULL, *memstat_str = NULL;
2dc17609
SH
1851 unsigned long memlimit = 0, memusage = 0, cached = 0, hosttotal = 0;
1852 char *line = NULL;
e1068397 1853 size_t linelen = 0, total_len = 0, rv = 0;
97f1f27b
YY
1854 char *cache = d->buf;
1855 size_t cache_size = d->buflen;
2c51f8dd 1856 FILE *f = NULL;
2dc17609 1857
97f1f27b
YY
1858 if (offset){
1859 if (offset > d->size)
1860 return -EINVAL;
1861 int left = d->size - offset;
1862 total_len = left > size ? size: left;
1863 memcpy(buf, cache + offset, total_len);
1864 return total_len;
1865 }
2dc17609 1866
2c51f8dd 1867 cg = get_pid_cgroup(fc->pid, "memory");
2dc17609 1868 if (!cg)
53b43826 1869 return read_file("/proc/meminfo", buf, size, d);
2dc17609
SH
1870
1871 if (!cgm_get_value("memory", cg, "memory.limit_in_bytes", &memlimit_str))
2c51f8dd 1872 goto err;
2dc17609 1873 if (!cgm_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
2c51f8dd 1874 goto err;
2dc17609 1875 if (!cgm_get_value("memory", cg, "memory.stat", &memstat_str))
2c51f8dd 1876 goto err;
2dc17609
SH
1877 memlimit = strtoul(memlimit_str, NULL, 10);
1878 memusage = strtoul(memusage_str, NULL, 10);
1879 memlimit /= 1024;
1880 memusage /= 1024;
1881 get_mem_cached(memstat_str, &cached);
1882
1883 f = fopen("/proc/meminfo", "r");
1884 if (!f)
2c51f8dd 1885 goto err;
2dc17609
SH
1886
1887 while (getline(&line, &linelen, f) != -1) {
1888 size_t l;
1889 char *printme, lbuf[100];
1890
1891 memset(lbuf, 0, 100);
1892 if (startswith(line, "MemTotal:")) {
1893 sscanf(line+14, "%lu", &hosttotal);
1894 if (hosttotal < memlimit)
1895 memlimit = hosttotal;
1896 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
1897 printme = lbuf;
1898 } else if (startswith(line, "MemFree:")) {
1899 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
1900 printme = lbuf;
1901 } else if (startswith(line, "MemAvailable:")) {
1902 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
1903 printme = lbuf;
1904 } else if (startswith(line, "Buffers:")) {
1905 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
1906 printme = lbuf;
1907 } else if (startswith(line, "Cached:")) {
1908 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
1909 printme = lbuf;
1910 } else if (startswith(line, "SwapCached:")) {
1911 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
1912 printme = lbuf;
1913 } else
1914 printme = line;
97f1f27b
YY
1915
1916 l = snprintf(cache, cache_size, "%s", printme);
e1068397
MM
1917 if (l < 0) {
1918 perror("Error writing to cache");
1919 rv = 0;
1920 goto err;
1921
1922 }
1923 if (l >= cache_size) {
1924 fprintf(stderr, "Internal error: truncated write to cache\n");
1925 rv = 0;
1926 goto err;
1927 }
1928
97f1f27b
YY
1929 cache += l;
1930 cache_size -= l;
2f919d9d 1931 total_len += l;
2dc17609
SH
1932 }
1933
97f1f27b
YY
1934 d->size = total_len;
1935 if (total_len > size ) total_len = size;
1936 memcpy(buf, d->buf, total_len);
1937
e1068397 1938 rv = total_len;
2c51f8dd
SH
1939err:
1940 if (f)
1941 fclose(f);
92c84dc4 1942 free(line);
2c51f8dd
SH
1943 free(cg);
1944 free(memlimit_str);
1945 free(memusage_str);
1946 free(memstat_str);
e1068397 1947 return rv;
23ce2127
SH
1948}
1949
1950/*
1951 * Read the cpuset.cpus for cg
2c51f8dd 1952 * Return the answer in a newly allocated string which must be freed
23ce2127
SH
1953 */
1954static char *get_cpuset(const char *cg)
1955{
1956 char *answer;
1957
1958 if (!cgm_get_value("cpuset", cg, "cpuset.cpus", &answer))
1959 return NULL;
1960 return answer;
1961}
1962
fa47bb52 1963bool cpu_in_cpuset(int cpu, const char *cpuset);
23ce2127 1964
aeb56147
SH
1965static bool cpuline_in_cpuset(const char *line, const char *cpuset)
1966{
1967 int cpu;
1968
1969 if (sscanf(line, "processor : %d", &cpu) != 1)
1970 return false;
1971 return cpu_in_cpuset(cpu, cpuset);
1972}
1973
23ce2127
SH
1974/*
1975 * check whether this is a '^processor" line in /proc/cpuinfo
1976 */
1977static bool is_processor_line(const char *line)
1978{
1979 int cpu;
1980
1981 if (sscanf(line, "processor : %d", &cpu) == 1)
1982 return true;
1983 return false;
1984}
1985
23ce2127
SH
1986static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
1987 struct fuse_file_info *fi)
1988{
1989 struct fuse_context *fc = fuse_get_context();
97f1f27b 1990 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
1991 char *cg;
1992 char *cpuset = NULL;
23ce2127 1993 char *line = NULL;
e1068397 1994 size_t linelen = 0, total_len = 0, rv = 0;
23ce2127
SH
1995 bool am_printing = false;
1996 int curcpu = -1;
97f1f27b
YY
1997 char *cache = d->buf;
1998 size_t cache_size = d->buflen;
2c51f8dd 1999 FILE *f = NULL;
23ce2127 2000
97f1f27b
YY
2001 if (offset){
2002 if (offset > d->size)
2003 return -EINVAL;
2004 int left = d->size - offset;
2005 total_len = left > size ? size: left;
2006 memcpy(buf, cache + offset, total_len);
2f919d9d 2007 return total_len;
97f1f27b 2008 }
23ce2127 2009
2c51f8dd 2010 cg = get_pid_cgroup(fc->pid, "cpuset");
23ce2127 2011 if (!cg)
53b43826 2012 return read_file("proc/cpuinfo", buf, size, d);
23ce2127
SH
2013
2014 cpuset = get_cpuset(cg);
2015 if (!cpuset)
2c51f8dd 2016 goto err;
23ce2127
SH
2017
2018 f = fopen("/proc/cpuinfo", "r");
2019 if (!f)
2c51f8dd 2020 goto err;
23ce2127
SH
2021
2022 while (getline(&line, &linelen, f) != -1) {
2023 size_t l;
2024 if (is_processor_line(line)) {
aeb56147 2025 am_printing = cpuline_in_cpuset(line, cpuset);
23ce2127
SH
2026 if (am_printing) {
2027 curcpu ++;
97f1f27b 2028 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
e1068397
MM
2029 if (l < 0) {
2030 perror("Error writing to cache");
2031 rv = 0;
2032 goto err;
2033 }
2034 if (l >= cache_size) {
2035 fprintf(stderr, "Internal error: truncated write to cache\n");
2036 rv = 0;
2037 goto err;
2038 }
97f1f27b
YY
2039 if (l < cache_size){
2040 cache += l;
2041 cache_size -= l;
2042 total_len += l;
2043 }else{
2044 cache += cache_size;
2045 total_len += cache_size;
2046 cache_size = 0;
2047 break;
2048 }
23ce2127
SH
2049 }
2050 continue;
2051 }
2052 if (am_printing) {
97f1f27b 2053 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2054 if (l < 0) {
2055 perror("Error writing to cache");
2056 rv = 0;
2057 goto err;
2058 }
2059 if (l >= cache_size) {
2060 fprintf(stderr, "Internal error: truncated write to cache\n");
2061 rv = 0;
2062 goto err;
2063 }
97f1f27b
YY
2064 if (l < cache_size) {
2065 cache += l;
2066 cache_size -= l;
2067 total_len += l;
2068 } else {
2069 cache += cache_size;
2070 total_len += cache_size;
2071 cache_size = 0;
2072 break;
2073 }
23ce2127
SH
2074 }
2075 }
2076
97f1f27b
YY
2077 d->size = total_len;
2078 if (total_len > size ) total_len = size;
2079
2080 /* read from off 0 */
2081 memcpy(buf, d->buf, total_len);
e1068397 2082 rv = total_len;
2c51f8dd
SH
2083err:
2084 if (f)
2085 fclose(f);
92c84dc4 2086 free(line);
2c51f8dd
SH
2087 free(cpuset);
2088 free(cg);
e1068397 2089 return rv;
23ce2127
SH
2090}
2091
2092static int proc_stat_read(char *buf, size_t size, off_t offset,
2093 struct fuse_file_info *fi)
2094{
aeb56147 2095 struct fuse_context *fc = fuse_get_context();
97f1f27b 2096 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2097 char *cg;
2098 char *cpuset = NULL;
aeb56147 2099 char *line = NULL;
e1068397 2100 size_t linelen = 0, total_len = 0, rv = 0;
2a0fde62 2101 int curcpu = -1; /* cpu numbering starts at 0 */
97f1f27b
YY
2102 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0;
2103 unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
2104 irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0;
2105#define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
2106 char cpuall[CPUALL_MAX_SIZE];
2107 /* reserve for cpu all */
2108 char *cache = d->buf + CPUALL_MAX_SIZE;
2109 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
2c51f8dd 2110 FILE *f = NULL;
aeb56147 2111
97f1f27b
YY
2112 if (offset){
2113 if (offset > d->size)
2114 return -EINVAL;
2115 int left = d->size - offset;
2116 total_len = left > size ? size: left;
2117 memcpy(buf, d->buf + offset, total_len);
2f919d9d 2118 return total_len;
97f1f27b 2119 }
aeb56147 2120
2c51f8dd 2121 cg = get_pid_cgroup(fc->pid, "cpuset");
aeb56147 2122 if (!cg)
53b43826 2123 return read_file("/proc/stat", buf, size, d);
aeb56147
SH
2124
2125 cpuset = get_cpuset(cg);
2126 if (!cpuset)
2c51f8dd 2127 goto err;
aeb56147
SH
2128
2129 f = fopen("/proc/stat", "r");
2130 if (!f)
2c51f8dd 2131 goto err;
aeb56147 2132
97f1f27b
YY
2133 //skip first line
2134 if (getline(&line, &linelen, f) < 0) {
2135 fprintf(stderr, "proc_stat_read read first line failed\n");
2c51f8dd 2136 goto err;
97f1f27b
YY
2137 }
2138
aeb56147
SH
2139 while (getline(&line, &linelen, f) != -1) {
2140 size_t l;
2141 int cpu;
2a0fde62 2142 char cpu_char[10]; /* That's a lot of cores */
aeb56147
SH
2143 char *c;
2144
2a0fde62
CB
2145 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
2146 /* not a ^cpuN line containing a number N, just print it */
97f1f27b 2147 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2148 if (l < 0) {
2149 perror("Error writing to cache");
2150 rv = 0;
2151 goto err;
2152 }
2153 if (l >= cache_size) {
2154 fprintf(stderr, "Internal error: truncated write to cache\n");
2155 rv = 0;
2156 goto err;
2157 }
2158 if (l < cache_size) {
97f1f27b
YY
2159 cache += l;
2160 cache_size -= l;
2161 total_len += l;
2162 continue;
e1068397 2163 } else {
97f1f27b
YY
2164 //no more space, break it
2165 cache += cache_size;
2166 total_len += cache_size;
2167 cache_size = 0;
2168 break;
2169 }
aeb56147 2170 }
2a0fde62
CB
2171
2172 if (sscanf(cpu_char, "%d", &cpu) != 1)
2173 continue;
aeb56147
SH
2174 if (!cpu_in_cpuset(cpu, cpuset))
2175 continue;
2176 curcpu ++;
2177
2178 c = strchr(line, ' ');
2179 if (!c)
2180 continue;
25c5e8fb 2181 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
e1068397
MM
2182 if (l < 0) {
2183 perror("Error writing to cache");
2184 rv = 0;
2185 goto err;
2186
2187 }
2188 if (l >= cache_size) {
2189 fprintf(stderr, "Internal error: truncated write to cache\n");
2190 rv = 0;
2191 goto err;
2192 }
2193
97f1f27b
YY
2194 cache += l;
2195 cache_size -= l;
aeb56147 2196 total_len += l;
2f919d9d 2197
97f1f27b
YY
2198 if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, &idle, &iowait, &irq,
2199 &softirq, &steal, &guest) != 9)
2200 continue;
2201 user_sum += user;
2202 nice_sum += nice;
2203 system_sum += system;
2204 idle_sum += idle;
2205 iowait_sum += iowait;
2206 irq_sum += irq;
2207 softirq_sum += softirq;
2208 steal_sum += steal;
2f919d9d 2209 guest_sum += guest;
97f1f27b
YY
2210 }
2211
2212 cache = d->buf;
2213
2f919d9d 2214 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
97f1f27b
YY
2215 "cpu ", user_sum, nice_sum, system_sum, idle_sum, iowait_sum, irq_sum, softirq_sum, steal_sum, guest_sum);
2216 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE){
2217 memcpy(cache, cpuall, cpuall_len);
2f919d9d 2218 cache += cpuall_len;
2c51f8dd 2219 } else{
97f1f27b
YY
2220 /* shouldn't happen */
2221 fprintf(stderr, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len);
2222 cpuall_len = 0;
2223 }
2224
2225 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
2226 total_len += cpuall_len;
2227 d->size = total_len;
2228 if (total_len > size ) total_len = size;
2229
2230 memcpy(buf, d->buf, total_len);
e1068397 2231 rv = total_len;
2c51f8dd
SH
2232
2233err:
2234 if (f)
2235 fclose(f);
92c84dc4 2236 free(line);
2c51f8dd
SH
2237 free(cpuset);
2238 free(cg);
e1068397 2239 return rv;
23ce2127
SH
2240}
2241
7bbf2246
SH
2242/*
2243 * How to guess what to present for uptime?
2244 * One thing we could do would be to take the date on the caller's
2245 * memory.usage_in_bytes file, which should equal the time of creation
2246 * of his cgroup. However, a task could be in a sub-cgroup of the
2247 * container. The same problem exists if we try to look at the ages
2248 * of processes in the caller's cgroup.
2249 *
2250 * So we'll fork a task that will enter the caller's pidns, mount a
2251 * fresh procfs, get the age of /proc/1, and pass that back over a pipe.
2252 *
2253 * For the second uptime #, we'll do as Stéphane had done, just copy
2254 * the number from /proc/uptime. Not sure how to best emulate 'idle'
2255 * time. Maybe someone can come up with a good algorithm and submit a
2256 * patch. Maybe something based on cpushare info?
2257 */
41bb9357
SH
2258
2259/* return age of the reaper for $pid, taken from ctime of its procdir */
2260static long int get_pid1_time(pid_t pid)
2261{
2262 char fnam[100];
ea56f722 2263 int fd, cpipe[2], ret;
41bb9357 2264 struct stat sb;
ea56f722
SH
2265 pid_t cpid;
2266 struct timeval tv;
2267 fd_set s;
2268 char v;
41bb9357
SH
2269
2270 if (unshare(CLONE_NEWNS))
2271 return 0;
2272
5ca64c2a
SG
2273 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
2274 perror("rslave mount failed");
2275 return 0;
2276 }
2277
c0adec85
SH
2278 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", pid);
2279 if (ret < 0 || ret >= sizeof(fnam))
2280 return 0;
2281
41bb9357
SH
2282 fd = open(fnam, O_RDONLY);
2283 if (fd < 0) {
2284 perror("get_pid1_time open of ns/pid");
2285 return 0;
2286 }
2287 if (setns(fd, 0)) {
2288 perror("get_pid1_time setns 1");
2289 close(fd);
2290 return 0;
2291 }
2292 close(fd);
41bb9357 2293
ea56f722
SH
2294 if (pipe(cpipe) < 0)
2295 exit(1);
41bb9357 2296
ea56f722
SH
2297loop:
2298 cpid = fork();
2299 if (cpid < 0)
41bb9357 2300 return 0;
ea56f722
SH
2301
2302 if (!cpid) {
2303 char b = '1';
2304 close(cpipe[0]);
2305 if (write(cpipe[1], &b, sizeof(char)) < 0) {
2306 fprintf(stderr, "%s (child): erorr on write: %s\n",
2307 __func__, strerror(errno));
2308 }
2309 close(cpipe[1]);
2310 umount2("/proc", MNT_DETACH);
2311 if (mount("proc", "/proc", "proc", 0, NULL)) {
2312 perror("get_pid1_time mount");
2313 return 0;
2314 }
2315 ret = lstat("/proc/1", &sb);
2316 if (ret) {
2317 perror("get_pid1_time lstat");
2318 return 0;
2319 }
2320 return time(NULL) - sb.st_ctime;
41bb9357 2321 }
ea56f722
SH
2322
2323 // give the child 1 second to be done forking and
2324 // write it's ack
2325 FD_ZERO(&s);
2326 FD_SET(cpipe[0], &s);
2327 tv.tv_sec = 1;
2328 tv.tv_usec = 0;
2329 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
2330 if (ret <= 0)
2331 goto again;
2332 ret = read(cpipe[0], &v, 1);
2333 if (ret != sizeof(char) || v != '1') {
2334 goto again;
41bb9357 2335 }
ea56f722
SH
2336
2337 wait_for_pid(cpid);
67bd113f 2338 _exit(0);
ea56f722
SH
2339
2340again:
2341 kill(cpid, SIGKILL);
2342 wait_for_pid(cpid);
2343 goto loop;
41bb9357
SH
2344}
2345
2346static long int getreaperage(pid_t qpid)
2347{
2348 int pid, mypipe[2], ret;
2349 struct timeval tv;
2350 fd_set s;
2351 long int mtime, answer = 0;
2352
2353 if (pipe(mypipe)) {
2354 return 0;
2355 }
2356
2357 pid = fork();
2358
2359 if (!pid) { // child
2360 mtime = get_pid1_time(qpid);
2361 if (write(mypipe[1], &mtime, sizeof(mtime)) != sizeof(mtime))
2362 fprintf(stderr, "Warning: bad write from getreaperage\n");
67bd113f 2363 _exit(0);
41bb9357
SH
2364 }
2365
2366 close(mypipe[1]);
2367 FD_ZERO(&s);
2368 FD_SET(mypipe[0], &s);
2369 tv.tv_sec = 1;
2370 tv.tv_usec = 0;
2371 ret = select(mypipe[0]+1, &s, NULL, NULL, &tv);
ea56f722 2372 if (ret <= 0) {
41bb9357
SH
2373 perror("select");
2374 goto out;
2375 }
2376 if (!ret) {
1420baf8 2377 fprintf(stderr, "timed out\n");
41bb9357
SH
2378 goto out;
2379 }
2380 if (read(mypipe[0], &mtime, sizeof(mtime)) != sizeof(mtime)) {
2381 perror("read");
2382 goto out;
2383 }
2384 answer = mtime;
2385
2386out:
2387 wait_for_pid(pid);
2388 close(mypipe[0]);
2389 return answer;
2390}
2391
38056ebc 2392static unsigned long int getprocidle(void)
41bb9357
SH
2393{
2394 FILE *f = fopen("/proc/uptime", "r");
38056ebc
BM
2395 unsigned long int age, idle;
2396 unsigned long int age_nsec, idle_nsec;
2397
92c84dc4 2398 int ret;
41bb9357
SH
2399 if (!f)
2400 return 0;
38056ebc 2401 ret = fscanf(f, "%lu.%02lu %lu.%02lu", &age, &age_nsec, &idle, &idle_nsec);
92c84dc4 2402 fclose(f);
38056ebc 2403 if (ret != 4)
41bb9357
SH
2404 return 0;
2405 return idle;
2406}
2407
2408/*
2409 * We read /proc/uptime and reuse its second field.
2410 * For the first field, we use the mtime for the reaper for
2411 * the calling pid as returned by getreaperage
2412 */
23ce2127
SH
2413static int proc_uptime_read(char *buf, size_t size, off_t offset,
2414 struct fuse_file_info *fi)
2415{
41bb9357 2416 struct fuse_context *fc = fuse_get_context();
97f1f27b 2417 struct file_info *d = (struct file_info *)fi->fh;
41bb9357 2418 long int reaperage = getreaperage(fc->pid);;
38056ebc 2419 unsigned long int idletime = getprocidle();
97f1f27b 2420 size_t total_len = 0;
41bb9357 2421
97f1f27b
YY
2422 if (offset){
2423 if (offset > d->size)
2424 return -EINVAL;
2425 return 0;
2426 }
2427
38056ebc 2428 total_len = snprintf(buf, size, "%ld.0 %lu.0\n", reaperage, idletime);
e1068397
MM
2429 if (total_len < 0){
2430 perror("Error writing to cache");
2431 return 0;
2432 }
cdcdb29b
MM
2433 if (total_len >= size){
2434 d->size = size;
2435 return size;
2436 }
2437
97f1f27b
YY
2438 d->size = total_len;
2439 return total_len;
23ce2127
SH
2440}
2441
49878439
YY
2442static int proc_diskstats_read(char *buf, size_t size, off_t offset,
2443 struct fuse_file_info *fi)
2444{
2445 char dev_name[72];
2446 struct fuse_context *fc = fuse_get_context();
97f1f27b 2447 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2448 char *cg;
2449 char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
49878439
YY
2450 *io_wait_time_str = NULL, *io_service_time_str = NULL;
2451 unsigned long read = 0, write = 0;
2452 unsigned long read_merged = 0, write_merged = 0;
2453 unsigned long read_sectors = 0, write_sectors = 0;
2454 unsigned long read_ticks = 0, write_ticks = 0;
2455 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
2456 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
2457 char *line = NULL;
e1068397 2458 size_t linelen = 0, total_len = 0, rv = 0;
49878439
YY
2459 unsigned int major = 0, minor = 0;
2460 int i = 0;
2c51f8dd 2461 FILE *f = NULL;
49878439 2462
97f1f27b
YY
2463 if (offset){
2464 if (offset > d->size)
2465 return -EINVAL;
2466 return 0;
2467 }
49878439 2468
2c51f8dd 2469 cg = get_pid_cgroup(fc->pid, "blkio");
49878439 2470 if (!cg)
53b43826 2471 return read_file("/proc/diskstats", buf, size, d);
49878439
YY
2472
2473 if (!cgm_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
2c51f8dd 2474 goto err;
49878439 2475 if (!cgm_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
2c51f8dd 2476 goto err;
49878439 2477 if (!cgm_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
2c51f8dd 2478 goto err;
49878439 2479 if (!cgm_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
2c51f8dd 2480 goto err;
49878439 2481 if (!cgm_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
2c51f8dd 2482 goto err;
49878439
YY
2483
2484
2485 f = fopen("/proc/diskstats", "r");
2486 if (!f)
2c51f8dd 2487 goto err;
49878439
YY
2488
2489 while (getline(&line, &linelen, f) != -1) {
2490 size_t l;
2491 char *printme, lbuf[256];
2492
c0adec85 2493 i = sscanf(line, "%u %u %71s", &major, &minor, dev_name);
49878439
YY
2494 if(i == 3){
2495 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
2496 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
2497 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
2498 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
2499 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
2500 read_sectors = read_sectors/512;
2501 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
2502 write_sectors = write_sectors/512;
2f919d9d 2503
49878439
YY
2504 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
2505 rd_svctm = rd_svctm/1000000;
2506 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
2507 rd_wait = rd_wait/1000000;
2508 read_ticks = rd_svctm + rd_wait;
2509
2510 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
2511 wr_svctm = wr_svctm/1000000;
2512 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
2513 wr_wait = wr_wait/1000000;
2514 write_ticks = wr_svctm + wr_wait;
2515
2516 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
2517 tot_ticks = tot_ticks/1000000;
2518 }else{
2519 continue;
2520 }
2521
2522 memset(lbuf, 0, 256);
2523 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
2f919d9d 2524 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
49878439
YY
2525 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
2526 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
2527 printme = lbuf;
2528 } else
2529 continue;
2530
2531 l = snprintf(buf, size, "%s", printme);
e1068397
MM
2532 if (l < 0) {
2533 perror("Error writing to fuse buf");
2534 rv = 0;
2535 goto err;
2536 }
2537 if (l >= size) {
2538 fprintf(stderr, "Internal error: truncated write to cache\n");
2539 rv = 0;
2540 goto err;
2541 }
49878439
YY
2542 buf += l;
2543 size -= l;
2544 total_len += l;
2545 }
2546
97f1f27b 2547 d->size = total_len;
e1068397 2548 rv = total_len;
2c51f8dd
SH
2549err:
2550 free(cg);
2551 if (f)
2552 fclose(f);
49878439 2553 free(line);
2c51f8dd
SH
2554 free(io_serviced_str);
2555 free(io_merged_str);
2556 free(io_service_bytes_str);
2557 free(io_wait_time_str);
2558 free(io_service_time_str);
e1068397 2559 return rv;
49878439
YY
2560}
2561
23ce2127
SH
2562static off_t get_procfile_size(const char *which)
2563{
2564 FILE *f = fopen(which, "r");
2565 char *line = NULL;
2566 size_t len = 0;
2567 ssize_t sz, answer = 0;
2568 if (!f)
2569 return 0;
2570
2571 while ((sz = getline(&line, &len, f)) != -1)
2572 answer += sz;
2573 fclose (f);
92c84dc4 2574 free(line);
23ce2127
SH
2575
2576 return answer;
2577}
2578
758ad80c
SH
2579static int proc_getattr(const char *path, struct stat *sb)
2580{
35629743
SH
2581 struct timespec now;
2582
2583 memset(sb, 0, sizeof(struct stat));
2584 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
2585 return -EINVAL;
2586 sb->st_uid = sb->st_gid = 0;
2587 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
2588 if (strcmp(path, "/proc") == 0) {
2589 sb->st_mode = S_IFDIR | 00555;
2590 sb->st_nlink = 2;
2591 return 0;
2592 }
2593 if (strcmp(path, "/proc/meminfo") == 0 ||
2594 strcmp(path, "/proc/cpuinfo") == 0 ||
2595 strcmp(path, "/proc/uptime") == 0 ||
49878439
YY
2596 strcmp(path, "/proc/stat") == 0 ||
2597 strcmp(path, "/proc/diskstats") == 0) {
7253e0a4 2598 sb->st_size = 0;
35629743
SH
2599 sb->st_mode = S_IFREG | 00444;
2600 sb->st_nlink = 1;
2601 return 0;
2602 }
2603
2604 return -ENOENT;
2605}
2606
2607static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2608 struct fuse_file_info *fi)
2609{
2610 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
2611 filler(buf, "meminfo", NULL, 0) != 0 ||
2612 filler(buf, "stat", NULL, 0) != 0 ||
49878439
YY
2613 filler(buf, "uptime", NULL, 0) != 0 ||
2614 filler(buf, "diskstats", NULL, 0) != 0)
758ad80c 2615 return -EINVAL;
758ad80c
SH
2616 return 0;
2617}
2618
35629743
SH
2619static int proc_open(const char *path, struct fuse_file_info *fi)
2620{
96fc5ee6
SH
2621 int type = -1;
2622 struct file_info *info;
2623
2624 if (strcmp(path, "/proc/meminfo") == 0)
2625 type = LXC_TYPE_PROC_MEMINFO;
2626 else if (strcmp(path, "/proc/cpuinfo") == 0)
2627 type = LXC_TYPE_PROC_CPUINFO;
2628 else if (strcmp(path, "/proc/uptime") == 0)
2629 type = LXC_TYPE_PROC_UPTIME;
2630 else if (strcmp(path, "/proc/stat") == 0)
2631 type = LXC_TYPE_PROC_STAT;
2632 else if (strcmp(path, "/proc/diskstats") == 0)
2633 type = LXC_TYPE_PROC_DISKSTATS;
2634 if (type == -1)
2635 return -ENOENT;
2636
2c51f8dd
SH
2637 info = malloc(sizeof(*info));
2638 if (!info)
2639 return -ENOMEM;
2640
96fc5ee6
SH
2641 memset(info, 0, sizeof(*info));
2642 info->type = type;
2643
97f1f27b 2644 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
2c51f8dd
SH
2645 do {
2646 info->buf = malloc(info->buflen);
2647 } while (!info->buf);
97f1f27b
YY
2648 memset(info->buf, 0, info->buflen);
2649 /* set actual size to buffer size */
2f919d9d 2650 info->size = info->buflen;
97f1f27b 2651
96fc5ee6
SH
2652 fi->fh = (unsigned long)info;
2653 return 0;
2654}
2655
2656static int proc_release(const char *path, struct fuse_file_info *fi)
2657{
2658 struct file_info *f = (struct file_info *)fi->fh;
2659
2660 do_release_file_info(f);
2661 return 0;
35629743
SH
2662}
2663
35629743
SH
2664static int proc_read(const char *path, char *buf, size_t size, off_t offset,
2665 struct fuse_file_info *fi)
2666{
96fc5ee6
SH
2667 struct file_info *f = (struct file_info *) fi->fh;
2668
2669 switch (f->type) {
2f919d9d 2670 case LXC_TYPE_PROC_MEMINFO:
23ce2127 2671 return proc_meminfo_read(buf, size, offset, fi);
96fc5ee6 2672 case LXC_TYPE_PROC_CPUINFO:
23ce2127 2673 return proc_cpuinfo_read(buf, size, offset, fi);
96fc5ee6 2674 case LXC_TYPE_PROC_UPTIME:
23ce2127 2675 return proc_uptime_read(buf, size, offset, fi);
96fc5ee6 2676 case LXC_TYPE_PROC_STAT:
23ce2127 2677 return proc_stat_read(buf, size, offset, fi);
96fc5ee6 2678 case LXC_TYPE_PROC_DISKSTATS:
49878439 2679 return proc_diskstats_read(buf, size, offset, fi);
96fc5ee6
SH
2680 default:
2681 return -EINVAL;
2682 }
35629743
SH
2683}
2684
2ad6d2bd
SH
2685/*
2686 * FUSE ops for /
2687 * these just delegate to the /proc and /cgroup ops as
2688 * needed
2689 */
758ad80c
SH
2690
2691static int lxcfs_getattr(const char *path, struct stat *sb)
2692{
2693 if (strcmp(path, "/") == 0) {
2694 sb->st_mode = S_IFDIR | 00755;
2695 sb->st_nlink = 2;
2696 return 0;
2697 }
2698 if (strncmp(path, "/cgroup", 7) == 0) {
2699 return cg_getattr(path, sb);
2700 }
35629743 2701 if (strncmp(path, "/proc", 5) == 0) {
758ad80c
SH
2702 return proc_getattr(path, sb);
2703 }
2704 return -EINVAL;
2705}
2706
2707static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
2708{
2709 if (strcmp(path, "/") == 0)
2710 return 0;
2711
2712 if (strncmp(path, "/cgroup", 7) == 0) {
2713 return cg_opendir(path, fi);
2714 }
35629743
SH
2715 if (strcmp(path, "/proc") == 0)
2716 return 0;
2717 return -ENOENT;
758ad80c
SH
2718}
2719
2720static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2721 struct fuse_file_info *fi)
2722{
2723 if (strcmp(path, "/") == 0) {
2724 if (filler(buf, "proc", NULL, 0) != 0 ||
2725 filler(buf, "cgroup", NULL, 0) != 0)
2726 return -EINVAL;
2727 return 0;
2728 }
35629743 2729 if (strncmp(path, "/cgroup", 7) == 0)
758ad80c 2730 return cg_readdir(path, buf, filler, offset, fi);
35629743
SH
2731 if (strcmp(path, "/proc") == 0)
2732 return proc_readdir(path, buf, filler, offset, fi);
758ad80c
SH
2733 return -EINVAL;
2734}
2735
2736static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
2737{
2738 if (strcmp(path, "/") == 0)
2739 return 0;
2740 if (strncmp(path, "/cgroup", 7) == 0) {
2741 return cg_releasedir(path, fi);
2742 }
35629743
SH
2743 if (strcmp(path, "/proc") == 0)
2744 return 0;
758ad80c
SH
2745 return -EINVAL;
2746}
2747
99978832
SH
2748static int lxcfs_open(const char *path, struct fuse_file_info *fi)
2749{
35629743 2750 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2751 return cg_open(path, fi);
35629743
SH
2752 if (strncmp(path, "/proc", 5) == 0)
2753 return proc_open(path, fi);
99978832
SH
2754
2755 return -EINVAL;
2756}
2757
2758static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
2759 struct fuse_file_info *fi)
2760{
35629743 2761 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2762 return cg_read(path, buf, size, offset, fi);
35629743
SH
2763 if (strncmp(path, "/proc", 5) == 0)
2764 return proc_read(path, buf, size, offset, fi);
99978832
SH
2765
2766 return -EINVAL;
2767}
2768
2ad6d2bd
SH
2769int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
2770 struct fuse_file_info *fi)
2771{
2772 if (strncmp(path, "/cgroup", 7) == 0) {
2773 return cg_write(path, buf, size, offset, fi);
2774 }
2775
2776 return -EINVAL;
2777}
2778
99978832
SH
2779static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
2780{
2781 return 0;
2782}
2783
2784static int lxcfs_release(const char *path, struct fuse_file_info *fi)
758ad80c 2785{
8f6e8f5e
SH
2786 if (strncmp(path, "/cgroup", 7) == 0)
2787 return cg_release(path, fi);
8f6e8f5e 2788 if (strncmp(path, "/proc", 5) == 0)
96fc5ee6 2789 return proc_release(path, fi);
8f6e8f5e
SH
2790
2791 return -EINVAL;
99978832
SH
2792}
2793
2794static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
2795{
2796 return 0;
758ad80c
SH
2797}
2798
ab54b798
SH
2799int lxcfs_mkdir(const char *path, mode_t mode)
2800{
2801 if (strncmp(path, "/cgroup", 7) == 0)
2802 return cg_mkdir(path, mode);
2803
2804 return -EINVAL;
2805}
2806
341b21ad
SH
2807int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
2808{
2809 if (strncmp(path, "/cgroup", 7) == 0)
2810 return cg_chown(path, uid, gid);
2811
2812 return -EINVAL;
2813}
2814
2ad6d2bd
SH
2815/*
2816 * cat first does a truncate before doing ops->write. This doesn't
2817 * really make sense for cgroups. So just return 0 always but do
2818 * nothing.
2819 */
2820int lxcfs_truncate(const char *path, off_t newsize)
2821{
2822 if (strncmp(path, "/cgroup", 7) == 0)
2823 return 0;
2824 return -EINVAL;
2825}
2826
50d8d5b5
SH
2827int lxcfs_rmdir(const char *path)
2828{
2829 if (strncmp(path, "/cgroup", 7) == 0)
2830 return cg_rmdir(path);
2831 return -EINVAL;
2832}
2833
fd2e4e03
SH
2834int lxcfs_chmod(const char *path, mode_t mode)
2835{
2836 if (strncmp(path, "/cgroup", 7) == 0)
2837 return cg_chmod(path, mode);
2838 return -EINVAL;
2839}
2840
758ad80c
SH
2841const struct fuse_operations lxcfs_ops = {
2842 .getattr = lxcfs_getattr,
2843 .readlink = NULL,
2844 .getdir = NULL,
2845 .mknod = NULL,
ab54b798 2846 .mkdir = lxcfs_mkdir,
758ad80c 2847 .unlink = NULL,
50d8d5b5 2848 .rmdir = lxcfs_rmdir,
758ad80c
SH
2849 .symlink = NULL,
2850 .rename = NULL,
2851 .link = NULL,
fd2e4e03 2852 .chmod = lxcfs_chmod,
341b21ad 2853 .chown = lxcfs_chown,
2ad6d2bd 2854 .truncate = lxcfs_truncate,
758ad80c 2855 .utime = NULL,
99978832
SH
2856
2857 .open = lxcfs_open,
2858 .read = lxcfs_read,
2859 .release = lxcfs_release,
2ad6d2bd 2860 .write = lxcfs_write,
99978832 2861
758ad80c 2862 .statfs = NULL,
99978832
SH
2863 .flush = lxcfs_flush,
2864 .fsync = lxcfs_fsync,
758ad80c
SH
2865
2866 .setxattr = NULL,
2867 .getxattr = NULL,
2868 .listxattr = NULL,
2869 .removexattr = NULL,
2870
2871 .opendir = lxcfs_opendir,
2872 .readdir = lxcfs_readdir,
2873 .releasedir = lxcfs_releasedir,
2874
2875 .fsyncdir = NULL,
2876 .init = NULL,
2877 .destroy = NULL,
2878 .access = NULL,
2879 .create = NULL,
2880 .ftruncate = NULL,
2881 .fgetattr = NULL,
2882};
2883
99978832 2884static void usage(const char *me)
758ad80c
SH
2885{
2886 fprintf(stderr, "Usage:\n");
2887 fprintf(stderr, "\n");
0b0f73db
SH
2888 fprintf(stderr, "%s mountpoint\n", me);
2889 fprintf(stderr, "%s -h\n", me);
758ad80c
SH
2890 exit(1);
2891}
2892
99978832 2893static bool is_help(char *w)
758ad80c
SH
2894{
2895 if (strcmp(w, "-h") == 0 ||
2896 strcmp(w, "--help") == 0 ||
2897 strcmp(w, "-help") == 0 ||
2898 strcmp(w, "help") == 0)
2899 return true;
2900 return false;
2901}
2902
0b0f73db
SH
2903void swallow_arg(int *argcp, char *argv[], char *which)
2904{
2905 int i;
2906
2907 for (i = 1; argv[i]; i++) {
2908 if (strcmp(argv[i], which) != 0)
2909 continue;
2910 for (; argv[i]; i++) {
2911 argv[i] = argv[i+1];
2912 }
2913 (*argcp)--;
2914 return;
2915 }
2916}
2917
2918void swallow_option(int *argcp, char *argv[], char *opt, char *v)
2919{
2920 int i;
2921
2922 for (i = 1; argv[i]; i++) {
2923 if (!argv[i+1])
2924 continue;
2925 if (strcmp(argv[i], opt) != 0)
2926 continue;
2927 if (strcmp(argv[i+1], v) != 0) {
2928 fprintf(stderr, "Warning: unexpected fuse option %s\n", v);
2929 exit(1);
2930 }
2931 for (; argv[i+1]; i++) {
2932 argv[i] = argv[i+2];
2933 }
2934 (*argcp) -= 2;
2935 return;
2936 }
2937}
2938
758ad80c
SH
2939int main(int argc, char *argv[])
2940{
c0adec85 2941 int ret = -1;
e5d26e0b 2942 struct lxcfs_state *d = NULL;
0b0f73db
SH
2943 /*
2944 * what we pass to fuse_main is:
2945 * argv[0] -s -f -o allow_other,directio argv[1] NULL
2946 */
2c51f8dd
SH
2947 int nargs = 5, cnt = 0;
2948 char *newargv[6];
758ad80c 2949
0b0f73db
SH
2950 /* accomodate older init scripts */
2951 swallow_arg(&argc, argv, "-s");
2952 swallow_arg(&argc, argv, "-f");
2953 swallow_option(&argc, argv, "-o", "allow_other");
2954
2e9c0b32
SH
2955 if (argc == 2 && strcmp(argv[1], "--version") == 0) {
2956 fprintf(stderr, "%s\n", VERSION);
2957 exit(0);
2958 }
0b0f73db 2959 if (argc != 2 || is_help(argv[1]))
758ad80c
SH
2960 usage(argv[0]);
2961
2c51f8dd
SH
2962 do {
2963 d = malloc(sizeof(*d));
2964 } while (!d);
0b0f73db 2965
38a76a91 2966 newargv[cnt++] = argv[0];
38a76a91
SH
2967 newargv[cnt++] = "-f";
2968 newargv[cnt++] = "-o";
2969 newargv[cnt++] = "allow_other,direct_io";
2970 newargv[cnt++] = argv[1];
2971 newargv[cnt++] = NULL;
758ad80c
SH
2972
2973 if (!cgm_escape_cgroup())
2974 fprintf(stderr, "WARNING: failed to escape to root cgroup\n");
2975
2976 if (!cgm_get_controllers(&d->subsystems))
c0adec85 2977 goto out;
758ad80c 2978
38a76a91 2979 ret = fuse_main(nargs, newargv, &lxcfs_ops, d);
2c51f8dd 2980 cgm_dbus_disconnect();
758ad80c 2981
c0adec85 2982out:
e5d26e0b 2983 free(d);
758ad80c 2984 return ret;
2183082c 2985}