]> git.proxmox.com Git - mirror_lxcfs.git/blame - lxcfs.c
Use find_mounted_controller in get_pid_cgroup
[mirror_lxcfs.git] / lxcfs.c
CommitLineData
758ad80c
SH
1/* lxcfs
2 *
2c51f8dd 3 * Copyright © 2014,2015 Canonical, Inc
758ad80c
SH
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
f2799430 6 * See COPYING file for details.
758ad80c
SH
7 */
8
35482f91
SH
9/*
10 * TODO XXX
11 * sanitize paths for '..', cgmanager's not doing that for us any more
12 * does fuse help us?
13 * Surely there are more paths we'll need to sanitize - look back through
14 * cgmanager's sources.
15 */
16
758ad80c
SH
17#define FUSE_USE_VERSION 26
18
2183082c 19#include <stdio.h>
758ad80c
SH
20#include <dirent.h>
21#include <fcntl.h>
22#include <fuse.h>
23#include <unistd.h>
24#include <errno.h>
25#include <stdbool.h>
26#include <time.h>
27#include <string.h>
28#include <stdlib.h>
29#include <libgen.h>
41bb9357
SH
30#include <sched.h>
31#include <linux/sched.h>
a05660a6 32#include <sys/socket.h>
41bb9357
SH
33#include <sys/mount.h>
34#include <wait.h>
758ad80c 35
977ac879 36#ifdef FORTRAVIS
df062bcb
SH
37#define GLIB_DISABLE_DEPRECATION_WARNINGS
38#include <glib-object.h>
977ac879 39#endif
df062bcb 40
35482f91 41#include "cgfs.h"
2e9c0b32 42#include "config.h" // for VERSION
758ad80c 43
443d13f5
SH
44enum {
45 LXC_TYPE_CGDIR,
46 LXC_TYPE_CGFILE,
47 LXC_TYPE_PROC_MEMINFO,
48 LXC_TYPE_PROC_CPUINFO,
49 LXC_TYPE_PROC_UPTIME,
50 LXC_TYPE_PROC_STAT,
51 LXC_TYPE_PROC_DISKSTATS,
52};
53
c688e1b3
SH
54struct file_info {
55 char *controller;
56 char *cgroup;
8f6e8f5e 57 char *file;
443d13f5 58 int type;
c688e1b3
SH
59 char *buf; // unused as of yet
60 int buflen;
97f1f27b 61 int size; //actual data size
b5ad2d21 62 int cached;
c688e1b3
SH
63};
64
97f1f27b
YY
65/* reserve buffer size, for cpuall in /proc/stat */
66#define BUF_RESERVE_SIZE 256
67
2c51f8dd
SH
68/*
69 * append pid to *src.
70 * src: a pointer to a char* in which ot append the pid.
71 * sz: the number of characters printed so far, minus trailing \0.
72 * asz: the allocated size so far
73 * pid: the pid to append
74 */
75static void must_strcat_pid(char **src, size_t *sz, size_t *asz, pid_t pid)
76{
77 char *d = *src;
78 char tmp[30];
79
80 sprintf(tmp, "%d\n", (int)pid);
81
82 if (!d) {
83 do {
84 d = malloc(BUF_RESERVE_SIZE);
85 } while (!d);
86 *src = d;
87 *asz = BUF_RESERVE_SIZE;
88 } else if (strlen(tmp) + sz + 1 >= asz) {
89 do {
90 d = realloc(d, *asz + BUF_RESERVE_SIZE);
91 } while (!d);
92 *src = d;
93 *asz += BUF_RESERVE_SIZE;
94 }
95 memcpy(d+*sz, tmp, strlen(tmp));
96 *sz += strlen(tmp);
97 d[*sz] = '\0';
98}
99
a05660a6
SH
100static int wait_for_pid(pid_t pid)
101{
102 int status, ret;
103
104again:
105 ret = waitpid(pid, &status, 0);
106 if (ret == -1) {
107 if (errno == EINTR)
108 goto again;
109 return -1;
110 }
111 if (ret != pid)
112 goto again;
113 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
114 return -1;
115 return 0;
116}
117
053a659d
SH
118/*
119 * Given a open file * to /proc/pid/{u,g}id_map, and an id
120 * valid in the caller's namespace, return the id mapped into
121 * pid's namespace.
122 * Returns the mapped id, or -1 on error.
123 */
124unsigned int
125convert_id_to_ns(FILE *idfile, unsigned int in_id)
126{
127 unsigned int nsuid, // base id for a range in the idfile's namespace
128 hostuid, // base id for a range in the caller's namespace
129 count; // number of ids in this range
130 char line[400];
131 int ret;
132
133 fseek(idfile, 0L, SEEK_SET);
134 while (fgets(line, 400, idfile)) {
135 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
136 if (ret != 3)
137 continue;
138 if (hostuid + count < hostuid || nsuid + count < nsuid) {
139 /*
140 * uids wrapped around - unexpected as this is a procfile,
141 * so just bail.
142 */
647c89e5 143 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
053a659d
SH
144 nsuid, hostuid, count, line);
145 return -1;
146 }
147 if (hostuid <= in_id && hostuid+count > in_id) {
148 /*
149 * now since hostuid <= in_id < hostuid+count, and
150 * hostuid+count and nsuid+count do not wrap around,
151 * we know that nsuid+(in_id-hostuid) which must be
152 * less that nsuid+(count) must not wrap around
153 */
154 return (in_id - hostuid) + nsuid;
155 }
156 }
157
158 // no answer found
159 return -1;
160}
161
341b21ad
SH
162/*
163 * for is_privileged_over,
164 * specify whether we require the calling uid to be root in his
165 * namespace
166 */
167#define NS_ROOT_REQD true
168#define NS_ROOT_OPT false
169
2c51f8dd
SH
170#define PROCLEN 100
171
341b21ad 172static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
758ad80c 173{
2c51f8dd
SH
174 char fpath[PROCLEN];
175 int ret;
053a659d
SH
176 bool answer = false;
177 uid_t nsuid;
178
341b21ad
SH
179 if (victim == -1 || uid == -1)
180 return false;
181
182 /*
183 * If the request is one not requiring root in the namespace,
184 * then having the same uid suffices. (i.e. uid 1000 has write
185 * access to files owned by uid 1000
186 */
187 if (!req_ns_root && uid == victim)
758ad80c
SH
188 return true;
189
2c51f8dd
SH
190 ret = snprintf(fpath, PROCLEN, "/proc/%d/uid_map", pid);
191 if (ret < 0 || ret >= PROCLEN)
192 return false;
053a659d
SH
193 FILE *f = fopen(fpath, "r");
194 if (!f)
195 return false;
196
341b21ad 197 /* if caller's not root in his namespace, reject */
053a659d
SH
198 nsuid = convert_id_to_ns(f, uid);
199 if (nsuid)
200 goto out;
201
341b21ad
SH
202 /*
203 * If victim is not mapped into caller's ns, reject.
204 * XXX I'm not sure this check is needed given that fuse
205 * will be sending requests where the vfs has converted
206 */
053a659d
SH
207 nsuid = convert_id_to_ns(f, victim);
208 if (nsuid == -1)
209 goto out;
210
211 answer = true;
212
213out:
214 fclose(f);
215 return answer;
758ad80c
SH
216}
217
218static bool perms_include(int fmode, mode_t req_mode)
219{
2ad6d2bd
SH
220 mode_t r;
221
222 switch (req_mode & O_ACCMODE) {
223 case O_RDONLY:
224 r = S_IROTH;
225 break;
226 case O_WRONLY:
227 r = S_IWOTH;
228 break;
229 case O_RDWR:
230 r = S_IROTH | S_IWOTH;
231 break;
232 default:
233 return false;
234 }
235 return ((fmode & r) == r);
758ad80c
SH
236}
237
3db25a35
SH
238static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
239{
240 char *start, *end;
241
242 if (strlen(taskcg) <= strlen(querycg)) {
243 fprintf(stderr, "%s: I was fed bad input\n", __func__);
244 return NULL;
245 }
246
247 if (strcmp(querycg, "/") == 0)
2c51f8dd 248 start = strdup(taskcg + 1);
3db25a35 249 else
2c51f8dd
SH
250 start = strdup(taskcg + strlen(querycg) + 1);
251 if (!start)
252 return NULL;
3db25a35
SH
253 end = strchr(start, '/');
254 if (end)
255 *end = '\0';
256 return start;
257}
258
2c51f8dd
SH
259static void stripnewline(char *x)
260{
261 size_t l = strlen(x);
262 if (l && x[l-1] == '\n')
263 x[l-1] = '\0';
264}
265
266static char *get_pid_cgroup(pid_t pid, const char *contrl)
267{
268 char fnam[PROCLEN];
269 FILE *f;
270 char *answer = NULL;
271 char *line = NULL;
272 size_t len = 0;
273 int ret;
777dd831
SH
274 const char *h = find_mounted_controller(contrl);
275 if (!h)
276 return NULL;
2c51f8dd
SH
277
278 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
279 if (ret < 0 || ret >= PROCLEN)
280 return NULL;
281 if (!(f = fopen(fnam, "r")))
282 return NULL;
283
284 while (getline(&line, &len, f) != -1) {
285 char *c1, *c2;
286 if (!line[0])
287 continue;
288 c1 = strchr(line, ':');
289 if (!c1)
290 goto out;
291 c1++;
292 c2 = strchr(c1, ':');
293 if (!c2)
294 goto out;
295 *c2 = '\0';
777dd831 296 if (strcmp(c1, h) != 0)
2c51f8dd
SH
297 continue;
298 c2++;
299 stripnewline(c2);
300 do {
301 answer = strdup(c2);
302 } while (!answer);
303 break;
304 }
305
306out:
307 fclose(f);
308 free(line);
309 return answer;
310}
311
758ad80c
SH
312/*
313 * check whether a fuse context may access a cgroup dir or file
314 *
315 * If file is not null, it is a cgroup file to check under cg.
316 * If file is null, then we are checking perms on cg itself.
317 *
318 * For files we can check the mode of the list_keys result.
319 * For cgroups, we must make assumptions based on the files under the
320 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
321 * yet.
322 */
323static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
324{
35482f91 325 struct cgfs_files *k = NULL;
2c51f8dd 326 bool ret = false;
758ad80c
SH
327
328 if (!file)
329 file = "tasks";
330
331 if (*file == '/')
332 file++;
333
35482f91
SH
334 k = cgfs_get_key(contrl, cg, file);
335 if (!k)
758ad80c 336 return false;
35482f91
SH
337
338 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
339 if (perms_include(k->mode >> 6, mode)) {
340 ret = true;
2c51f8dd 341 goto out;
758ad80c
SH
342 }
343 }
35482f91
SH
344 if (fc->gid == k->gid) {
345 if (perms_include(k->mode >> 3, mode)) {
346 ret = true;
347 goto out;
348 }
349 }
350 ret = perms_include(k->mode, mode);
758ad80c 351
2c51f8dd 352out:
35482f91 353 free_key(k);
2c51f8dd 354 return ret;
3db25a35
SH
355}
356
04b5cbdc
SH
357#define INITSCOPE "/init.scope"
358static void prune_init_slice(char *cg)
359{
360 char *point;
361 point = cg + strlen(cg) - strlen(INITSCOPE);
362 if (point < cg)
363 return;
364 if (strcmp(point, INITSCOPE) == 0) {
365 if (point == cg)
366 *(point+1) = '\0';
367 else
368 *point = '\0';
369 }
370}
371
3db25a35
SH
372/*
373 * If caller is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
374 * If caller is in /a, he may act on /a/b, but not on /b.
375 * if the answer is false and nextcg is not NULL, then *nextcg will point
2c51f8dd
SH
376 * to a string containing the next cgroup directory under cg, which must be
377 * freed by the caller.
3db25a35
SH
378 */
379static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
380{
2c51f8dd 381 char fnam[PROCLEN];
3db25a35
SH
382 FILE *f;
383 bool answer = false;
384 char *line = NULL;
385 size_t len = 0;
2c51f8dd 386 int ret;
3db25a35 387
2c51f8dd
SH
388 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
389 if (ret < 0 || ret >= PROCLEN)
390 return false;
3db25a35
SH
391 if (!(f = fopen(fnam, "r")))
392 return false;
393
394 while (getline(&line, &len, f) != -1) {
395 char *c1, *c2, *linecmp;
396 if (!line[0])
397 continue;
398 c1 = strchr(line, ':');
399 if (!c1)
400 goto out;
401 c1++;
402 c2 = strchr(c1, ':');
403 if (!c2)
404 goto out;
405 *c2 = '\0';
406 if (strcmp(c1, contrl) != 0)
407 continue;
408 c2++;
409 stripnewline(c2);
04b5cbdc 410 prune_init_slice(c2);
3db25a35
SH
411 /*
412 * callers pass in '/' for root cgroup, otherwise they pass
413 * in a cgroup without leading '/'
414 */
415 linecmp = *cg == '/' ? c2 : c2+1;
416 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
417 if (nextcg)
418 *nextcg = get_next_cgroup_dir(linecmp, cg);
419 goto out;
420 }
421 answer = true;
422 goto out;
423 }
424
425out:
426 fclose(f);
427 free(line);
428 return answer;
429}
430
758ad80c 431/*
2c51f8dd
SH
432 * given /cgroup/freezer/a/b, return "freezer".
433 * the returned char* should NOT be freed.
758ad80c
SH
434 */
435static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
436{
437 const char *p1;
2c51f8dd 438 char *contr, *slash;
758ad80c
SH
439
440 if (strlen(path) < 9)
441 return NULL;
ac5d9d48
SH
442 if (*(path+7) != '/')
443 return NULL;
758ad80c 444 p1 = path+8;
2c51f8dd
SH
445 contr = strdupa(p1);
446 if (!contr)
447 return NULL;
448 slash = strstr(contr, "/");
758ad80c
SH
449 if (slash)
450 *slash = '\0';
451
758ad80c 452 int i;
35482f91
SH
453 for (i = 0; i < num_hierarchies; i++) {
454 if (hierarchies[i] && strcmp(hierarchies[i], contr) == 0)
455 return hierarchies[i];
758ad80c 456 }
758ad80c
SH
457 return NULL;
458}
459
460/*
461 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
462 * Note that the returned value may include files (keynames) etc
463 */
464static const char *find_cgroup_in_path(const char *path)
465{
466 const char *p1;
467
468 if (strlen(path) < 9)
469 return NULL;
470 p1 = strstr(path+8, "/");
471 if (!p1)
472 return NULL;
473 return p1+1;
474}
475
2c51f8dd
SH
476/*
477 * dir should be freed, file not
478 */
758ad80c
SH
479static void get_cgdir_and_path(const char *cg, char **dir, char **file)
480{
758ad80c
SH
481 char *p;
482
2c51f8dd
SH
483 do {
484 *dir = strdup(cg);
485 } while (!*dir);
758ad80c
SH
486 *file = strrchr(cg, '/');
487 if (!*file) {
488 *file = NULL;
489 return;
490 }
491 p = strrchr(*dir, '/');
492 *p = '\0';
493}
494
495/*
2ad6d2bd 496 * FUSE ops for /cgroup
758ad80c 497 */
2ad6d2bd 498
758ad80c
SH
499static int cg_getattr(const char *path, struct stat *sb)
500{
501 struct timespec now;
502 struct fuse_context *fc = fuse_get_context();
2c51f8dd 503 char * cgdir = NULL;
758ad80c 504 char *fpath = NULL, *path1, *path2;
35482f91 505 struct cgfs_files *k = NULL;
758ad80c 506 const char *cgroup;
2c51f8dd
SH
507 const char *controller = NULL;
508 int ret = -ENOENT;
758ad80c
SH
509
510
511 if (!fc)
512 return -EIO;
513
514 memset(sb, 0, sizeof(struct stat));
515
516 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
517 return -EINVAL;
518
519 sb->st_uid = sb->st_gid = 0;
520 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
521 sb->st_size = 0;
522
523 if (strcmp(path, "/cgroup") == 0) {
524 sb->st_mode = S_IFDIR | 00755;
525 sb->st_nlink = 2;
526 return 0;
527 }
528
529 controller = pick_controller_from_path(fc, path);
530 if (!controller)
531 return -EIO;
758ad80c
SH
532 cgroup = find_cgroup_in_path(path);
533 if (!cgroup) {
534 /* this is just /cgroup/controller, return it as a dir */
535 sb->st_mode = S_IFDIR | 00755;
536 sb->st_nlink = 2;
537 return 0;
538 }
341b21ad 539
758ad80c
SH
540 get_cgdir_and_path(cgroup, &cgdir, &fpath);
541
542 if (!fpath) {
543 path1 = "/";
544 path2 = cgdir;
545 } else {
546 path1 = cgdir;
547 path2 = fpath;
548 }
549
758ad80c
SH
550 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
551 * Then check that caller's cgroup is under path if fpath is a child
552 * cgroup, or cgdir if fpath is a file */
553
554 if (is_child_cgroup(controller, path1, path2)) {
f9a05025
SH
555 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
556 /* this is just /cgroup/controller, return it as a dir */
557 sb->st_mode = S_IFDIR | 00555;
558 sb->st_nlink = 2;
2c51f8dd
SH
559 ret = 0;
560 goto out;
561 }
562 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
563 ret = -EACCES;
564 goto out;
f9a05025 565 }
758ad80c 566
053a659d
SH
567 // get uid, gid, from '/tasks' file and make up a mode
568 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
569 sb->st_mode = S_IFDIR | 00755;
35482f91 570 k = cgfs_get_key(controller, cgroup, "tasks");
053a659d 571 if (!k) {
053a659d
SH
572 sb->st_uid = sb->st_gid = 0;
573 } else {
053a659d
SH
574 sb->st_uid = k->uid;
575 sb->st_gid = k->gid;
576 }
2c51f8dd 577 free_key(k);
758ad80c 578 sb->st_nlink = 2;
2c51f8dd
SH
579 ret = 0;
580 goto out;
758ad80c
SH
581 }
582
35482f91 583 if ((k = cgfs_get_key(controller, path1, path2)) != NULL) {
758ad80c 584 sb->st_mode = S_IFREG | k->mode;
053a659d 585 sb->st_nlink = 1;
758ad80c
SH
586 sb->st_uid = k->uid;
587 sb->st_gid = k->gid;
7253e0a4 588 sb->st_size = 0;
2c51f8dd 589 free_key(k);
adc3867b
SH
590 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) {
591 ret = -ENOENT;
592 goto out;
593 }
594 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY)) {
595 ret = -EACCES;
596 goto out;
597 }
2c51f8dd
SH
598
599 ret = 0;
758ad80c
SH
600 }
601
2c51f8dd
SH
602out:
603 free(cgdir);
604 return ret;
758ad80c 605}
2183082c 606
758ad80c 607static int cg_opendir(const char *path, struct fuse_file_info *fi)
2183082c 608{
7f163b71 609 struct fuse_context *fc = fuse_get_context();
7f163b71 610 const char *cgroup;
c688e1b3 611 struct file_info *dir_info;
2c51f8dd 612 char *controller = NULL;
7f163b71
SH
613
614 if (!fc)
615 return -EIO;
616
c688e1b3
SH
617 if (strcmp(path, "/cgroup") == 0) {
618 cgroup = NULL;
619 controller = NULL;
620 } else {
621 // return list of keys for the controller, and list of child cgroups
622 controller = pick_controller_from_path(fc, path);
623 if (!controller)
624 return -EIO;
7f163b71 625
c688e1b3
SH
626 cgroup = find_cgroup_in_path(path);
627 if (!cgroup) {
628 /* this is just /cgroup/controller, return its contents */
629 cgroup = "/";
630 }
7f163b71
SH
631 }
632
2c51f8dd 633 if (cgroup && !fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
7f163b71 634 return -EACCES;
2c51f8dd 635 }
c688e1b3
SH
636
637 /* we'll free this at cg_releasedir */
2c51f8dd
SH
638 dir_info = malloc(sizeof(*dir_info));
639 if (!dir_info)
640 return -ENOMEM;
35482f91
SH
641 dir_info->controller = must_copy_string(controller);
642 dir_info->cgroup = must_copy_string(cgroup);
443d13f5 643 dir_info->type = LXC_TYPE_CGDIR;
c688e1b3 644 dir_info->buf = NULL;
8f6e8f5e 645 dir_info->file = NULL;
c688e1b3
SH
646 dir_info->buflen = 0;
647
648 fi->fh = (unsigned long)dir_info;
758ad80c
SH
649 return 0;
650}
651
758ad80c
SH
652static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
653 struct fuse_file_info *fi)
654{
c688e1b3 655 struct file_info *d = (struct file_info *)fi->fh;
35482f91 656 struct cgfs_files **list = NULL;
2c51f8dd
SH
657 int i, ret;
658 char *nextcg = NULL;
758ad80c 659 struct fuse_context *fc = fuse_get_context();
2c51f8dd 660 char **clist = NULL;
758ad80c 661
443d13f5 662 if (d->type != LXC_TYPE_CGDIR) {
b845ad01
SH
663 fprintf(stderr, "Internal error: file cache info used in readdir\n");
664 return -EIO;
665 }
c688e1b3
SH
666 if (!d->cgroup && !d->controller) {
667 // ls /var/lib/lxcfs/cgroup - just show list of controllers
758ad80c
SH
668 int i;
669
35482f91
SH
670 for (i = 0; i < num_hierarchies; i++) {
671 if (hierarchies[i] && filler(buf, hierarchies[i], NULL, 0) != 0) {
758ad80c
SH
672 return -EIO;
673 }
674 }
675 return 0;
676 }
677
35482f91 678 if (!cgfs_list_keys(d->controller, d->cgroup, &list)) {
3db25a35 679 // not a valid cgroup
2c51f8dd
SH
680 ret = -EINVAL;
681 goto out;
682 }
3db25a35 683
c688e1b3 684 if (!caller_is_in_ancestor(fc->pid, d->controller, d->cgroup, &nextcg)) {
3db25a35
SH
685 if (nextcg) {
686 int ret;
687 ret = filler(buf, nextcg, NULL, 0);
2c51f8dd
SH
688 free(nextcg);
689 if (ret != 0) {
690 ret = -EIO;
691 goto out;
692 }
3db25a35 693 }
2c51f8dd
SH
694 ret = 0;
695 goto out;
3db25a35
SH
696 }
697
758ad80c 698 for (i = 0; list[i]; i++) {
758ad80c 699 if (filler(buf, list[i]->name, NULL, 0) != 0) {
2c51f8dd
SH
700 ret = -EIO;
701 goto out;
758ad80c
SH
702 }
703 }
704
705 // now get the list of child cgroups
758ad80c 706
35482f91 707 if (!cgfs_list_children(d->controller, d->cgroup, &clist)) {
2c51f8dd
SH
708 ret = 0;
709 goto out;
710 }
758ad80c 711 for (i = 0; clist[i]; i++) {
758ad80c 712 if (filler(buf, clist[i], NULL, 0) != 0) {
2c51f8dd
SH
713 ret = -EIO;
714 goto out;
758ad80c
SH
715 }
716 }
2c51f8dd
SH
717 ret = 0;
718
719out:
720 free_keys(list);
721 if (clist) {
722 for (i = 0; clist[i]; i++)
723 free(clist[i]);
724 free(clist);
725 }
726 return ret;
758ad80c
SH
727}
728
8f6e8f5e
SH
729static void do_release_file_info(struct file_info *f)
730{
2c51f8dd
SH
731 if (!f)
732 return;
733 free(f->controller);
734 free(f->cgroup);
735 free(f->file);
736 free(f->buf);
737 free(f);
8f6e8f5e
SH
738}
739
758ad80c
SH
740static int cg_releasedir(const char *path, struct fuse_file_info *fi)
741{
c688e1b3
SH
742 struct file_info *d = (struct file_info *)fi->fh;
743
8f6e8f5e 744 do_release_file_info(d);
758ad80c
SH
745 return 0;
746}
747
99978832
SH
748static int cg_open(const char *path, struct fuse_file_info *fi)
749{
99978832 750 const char *cgroup;
2c51f8dd 751 char *fpath = NULL, *path1, *path2, * cgdir = NULL, *controller;
35482f91 752 struct cgfs_files *k = NULL;
8f6e8f5e 753 struct file_info *file_info;
99978832 754 struct fuse_context *fc = fuse_get_context();
2c51f8dd 755 int ret;
99978832
SH
756
757 if (!fc)
758 return -EIO;
759
760 controller = pick_controller_from_path(fc, path);
761 if (!controller)
762 return -EIO;
763 cgroup = find_cgroup_in_path(path);
764 if (!cgroup)
765 return -EINVAL;
766
767 get_cgdir_and_path(cgroup, &cgdir, &fpath);
768 if (!fpath) {
769 path1 = "/";
770 path2 = cgdir;
771 } else {
772 path1 = cgdir;
773 path2 = fpath;
774 }
775
35482f91 776 k = cgfs_get_key(controller, path1, path2);
2c51f8dd
SH
777 if (!k) {
778 ret = -EINVAL;
779 goto out;
780 }
781 free_key(k);
99978832 782
2c51f8dd 783 if (!fc_may_access(fc, controller, path1, path2, fi->flags)) {
8f6e8f5e 784 // should never get here
2c51f8dd
SH
785 ret = -EACCES;
786 goto out;
787 }
99978832 788
8f6e8f5e 789 /* we'll free this at cg_release */
2c51f8dd
SH
790 file_info = malloc(sizeof(*file_info));
791 if (!file_info) {
792 ret = -ENOMEM;
793 goto out;
794 }
35482f91
SH
795 file_info->controller = must_copy_string(controller);
796 file_info->cgroup = must_copy_string(path1);
797 file_info->file = must_copy_string(path2);
443d13f5 798 file_info->type = LXC_TYPE_CGFILE;
8f6e8f5e
SH
799 file_info->buf = NULL;
800 file_info->buflen = 0;
801
802 fi->fh = (unsigned long)file_info;
2c51f8dd
SH
803 ret = 0;
804
805out:
806 free(cgdir);
807 return ret;
8f6e8f5e
SH
808}
809
810static int cg_release(const char *path, struct fuse_file_info *fi)
811{
812 struct file_info *f = (struct file_info *)fi->fh;
813
814 do_release_file_info(f);
815 return 0;
99978832
SH
816}
817
a05660a6
SH
818static int msgrecv(int sockfd, void *buf, size_t len)
819{
820 struct timeval tv;
821 fd_set rfds;
822
823 FD_ZERO(&rfds);
824 FD_SET(sockfd, &rfds);
825 tv.tv_sec = 2;
826 tv.tv_usec = 0;
827
ea56f722 828 if (select(sockfd+1, &rfds, NULL, NULL, &tv) <= 0)
a05660a6
SH
829 return -1;
830 return recv(sockfd, buf, len, MSG_DONTWAIT);
831}
832
01e71852
SH
833#define SEND_CREDS_OK 0
834#define SEND_CREDS_NOTSK 1
835#define SEND_CREDS_FAIL 2
836static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
a05660a6
SH
837{
838 struct msghdr msg = { 0 };
839 struct iovec iov;
840 struct cmsghdr *cmsg;
841 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
842 char buf[1];
843 buf[0] = 'p';
844
01e71852
SH
845 if (pingfirst) {
846 if (msgrecv(sock, buf, 1) != 1) {
1420baf8 847 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
01e71852
SH
848 __func__);
849 return SEND_CREDS_FAIL;
850 }
a05660a6
SH
851 }
852
853 msg.msg_control = cmsgbuf;
854 msg.msg_controllen = sizeof(cmsgbuf);
855
856 cmsg = CMSG_FIRSTHDR(&msg);
857 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
858 cmsg->cmsg_level = SOL_SOCKET;
859 cmsg->cmsg_type = SCM_CREDENTIALS;
860 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
861
862 msg.msg_name = NULL;
863 msg.msg_namelen = 0;
864
865 buf[0] = v;
866 iov.iov_base = buf;
867 iov.iov_len = sizeof(buf);
868 msg.msg_iov = &iov;
869 msg.msg_iovlen = 1;
870
871 if (sendmsg(sock, &msg, 0) < 0) {
1420baf8 872 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
a05660a6
SH
873 strerror(errno));
874 if (errno == 3)
01e71852
SH
875 return SEND_CREDS_NOTSK;
876 return SEND_CREDS_FAIL;
a05660a6
SH
877 }
878
01e71852 879 return SEND_CREDS_OK;
a05660a6
SH
880}
881
882static bool recv_creds(int sock, struct ucred *cred, char *v)
883{
884 struct msghdr msg = { 0 };
885 struct iovec iov;
886 struct cmsghdr *cmsg;
887 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
888 char buf[1];
889 int ret;
890 int optval = 1;
6ee867dc
SH
891 struct timeval tv;
892 fd_set rfds;
a05660a6
SH
893
894 *v = '1';
895
896 cred->pid = -1;
897 cred->uid = -1;
898 cred->gid = -1;
899
900 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
1420baf8 901 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
a05660a6
SH
902 return false;
903 }
904 buf[0] = '1';
905 if (write(sock, buf, 1) != 1) {
1420baf8 906 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
a05660a6
SH
907 return false;
908 }
909
910 msg.msg_name = NULL;
911 msg.msg_namelen = 0;
912 msg.msg_control = cmsgbuf;
913 msg.msg_controllen = sizeof(cmsgbuf);
914
915 iov.iov_base = buf;
916 iov.iov_len = sizeof(buf);
917 msg.msg_iov = &iov;
918 msg.msg_iovlen = 1;
919
6ee867dc
SH
920 FD_ZERO(&rfds);
921 FD_SET(sock, &rfds);
922 tv.tv_sec = 2;
923 tv.tv_usec = 0;
ea56f722 924 if (select(sock+1, &rfds, NULL, NULL, &tv) <= 0) {
6ee867dc
SH
925 fprintf(stderr, "Failed to select for scm_cred: %s\n",
926 strerror(errno));
927 return false;
928 }
929 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
a05660a6 930 if (ret < 0) {
1420baf8 931 fprintf(stderr, "Failed to receive scm_cred: %s\n",
a05660a6
SH
932 strerror(errno));
933 return false;
934 }
935
936 cmsg = CMSG_FIRSTHDR(&msg);
937
938 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
939 cmsg->cmsg_level == SOL_SOCKET &&
940 cmsg->cmsg_type == SCM_CREDENTIALS) {
941 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
942 }
943 *v = buf[0];
944
945 return true;
946}
947
948
949/*
4775fba1
SH
950 * pid_to_ns - reads pids from a ucred over a socket, then writes the
951 * int value back over the socket. This shifts the pid from the
952 * sender's pidns into tpid's pidns.
a05660a6 953 */
4775fba1 954static void pid_to_ns(int sock, pid_t tpid)
a05660a6
SH
955{
956 char v = '0';
957 struct ucred cred;
958
959 while (recv_creds(sock, &cred, &v)) {
960 if (v == '1')
67bd113f 961 _exit(0);
a05660a6 962 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
67bd113f 963 _exit(1);
a05660a6 964 }
67bd113f 965 _exit(0);
a05660a6
SH
966}
967
968/*
4775fba1 969 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
a05660a6 970 * in your old pidns. Only children which you fork will be in the target
4775fba1 971 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
a05660a6
SH
972 * actually convert pids
973 */
4775fba1 974static void pid_to_ns_wrapper(int sock, pid_t tpid)
a05660a6 975{
ea56f722 976 int newnsfd = -1, ret, cpipe[2];
a05660a6
SH
977 char fnam[100];
978 pid_t cpid;
ea56f722
SH
979 struct timeval tv;
980 fd_set s;
981 char v;
a05660a6 982
c0adec85
SH
983 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
984 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 985 _exit(1);
a05660a6
SH
986 newnsfd = open(fnam, O_RDONLY);
987 if (newnsfd < 0)
67bd113f 988 _exit(1);
a05660a6 989 if (setns(newnsfd, 0) < 0)
67bd113f 990 _exit(1);
a05660a6
SH
991 close(newnsfd);
992
ea56f722 993 if (pipe(cpipe) < 0)
67bd113f 994 _exit(1);
a05660a6 995
ea56f722
SH
996loop:
997 cpid = fork();
a05660a6 998 if (cpid < 0)
67bd113f 999 _exit(1);
ea56f722
SH
1000
1001 if (!cpid) {
1002 char b = '1';
1003 close(cpipe[0]);
1004 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1005 fprintf(stderr, "%s (child): erorr on write: %s\n",
1006 __func__, strerror(errno));
1007 }
1008 close(cpipe[1]);
4775fba1 1009 pid_to_ns(sock, tpid);
ea56f722
SH
1010 }
1011 // give the child 1 second to be done forking and
1012 // write it's ack
1013 FD_ZERO(&s);
1014 FD_SET(cpipe[0], &s);
1015 tv.tv_sec = 1;
1016 tv.tv_usec = 0;
1017 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
1018 if (ret <= 0)
1019 goto again;
1020 ret = read(cpipe[0], &v, 1);
1021 if (ret != sizeof(char) || v != '1') {
1022 goto again;
1023 }
1024
a05660a6 1025 if (!wait_for_pid(cpid))
67bd113f
SH
1026 _exit(1);
1027 _exit(0);
ea56f722
SH
1028
1029again:
1030 kill(cpid, SIGKILL);
1031 wait_for_pid(cpid);
1032 goto loop;
a05660a6
SH
1033}
1034
1035/*
1036 * To read cgroup files with a particular pid, we will setns into the child
1037 * pidns, open a pipe, fork a child - which will be the first to really be in
35482f91 1038 * the child ns - which does the cgfs_get_value and writes the data to the pipe.
a05660a6
SH
1039 */
1040static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
1041{
1042 int sock[2] = {-1, -1};
2c51f8dd 1043 char *tmpdata = NULL;
a05660a6
SH
1044 int ret;
1045 pid_t qpid, cpid = -1;
1046 bool answer = false;
1047 char v = '0';
1048 struct ucred cred;
1049 struct timeval tv;
2c51f8dd 1050 size_t sz = 0, asz = 0;
a05660a6
SH
1051 fd_set s;
1052
35482f91 1053 if (!cgfs_get_value(contrl, cg, file, &tmpdata))
a05660a6
SH
1054 return false;
1055
1056 /*
1057 * Now we read the pids from returned data one by one, pass
1058 * them into a child in the target namespace, read back the
1059 * translated pids, and put them into our to-return data
1060 */
1061
1062 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1063 perror("socketpair");
2c51f8dd
SH
1064 free(tmpdata);
1065 return false;
a05660a6
SH
1066 }
1067
1068 cpid = fork();
1069 if (cpid == -1)
1070 goto out;
1071
1072 if (!cpid) // child
4775fba1 1073 pid_to_ns_wrapper(sock[1], tpid);
a05660a6
SH
1074
1075 char *ptr = tmpdata;
1076 cred.uid = 0;
1077 cred.gid = 0;
1078 while (sscanf(ptr, "%d\n", &qpid) == 1) {
1079 cred.pid = qpid;
01e71852
SH
1080 ret = send_creds(sock[0], &cred, v, true);
1081
1082 if (ret == SEND_CREDS_NOTSK)
1083 goto next;
1084 if (ret == SEND_CREDS_FAIL)
a05660a6
SH
1085 goto out;
1086
1087 // read converted results
1088 FD_ZERO(&s);
1089 FD_SET(sock[0], &s);
6ee867dc 1090 tv.tv_sec = 2;
a05660a6
SH
1091 tv.tv_usec = 0;
1092 ret = select(sock[0]+1, &s, NULL, NULL, &tv);
1093 if (ret <= 0) {
6ee867dc
SH
1094 fprintf(stderr, "%s: select error waiting for pid from child: %s\n",
1095 __func__, strerror(errno));
a05660a6
SH
1096 goto out;
1097 }
1098 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1099 fprintf(stderr, "%s: error reading pid from child: %s\n",
1100 __func__, strerror(errno));
a05660a6
SH
1101 goto out;
1102 }
2c51f8dd 1103 must_strcat_pid(d, &sz, &asz, qpid);
01e71852 1104next:
a05660a6
SH
1105 ptr = strchr(ptr, '\n');
1106 if (!ptr)
1107 break;
1108 ptr++;
1109 }
1110
1111 cred.pid = getpid();
1112 v = '1';
01e71852 1113 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
a05660a6 1114 // failed to ask child to exit
6ee867dc
SH
1115 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
1116 __func__, strerror(errno));
a05660a6
SH
1117 goto out;
1118 }
1119
1120 answer = true;
1121
1122out:
2c51f8dd 1123 free(tmpdata);
a05660a6
SH
1124 if (cpid != -1)
1125 wait_for_pid(cpid);
1126 if (sock[0] != -1) {
1127 close(sock[0]);
1128 close(sock[1]);
1129 }
1130 return answer;
1131}
1132
99978832
SH
1133static int cg_read(const char *path, char *buf, size_t size, off_t offset,
1134 struct fuse_file_info *fi)
1135{
99978832 1136 struct fuse_context *fc = fuse_get_context();
8f6e8f5e 1137 struct file_info *f = (struct file_info *)fi->fh;
35482f91 1138 struct cgfs_files *k = NULL;
2c51f8dd
SH
1139 char *data = NULL;
1140 int ret, s;
1141 bool r;
99978832 1142
443d13f5 1143 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1144 fprintf(stderr, "Internal error: directory cache info used in cg_read\n");
1145 return -EIO;
1146 }
1147
99978832 1148 if (offset)
7253e0a4 1149 return 0;
99978832
SH
1150
1151 if (!fc)
1152 return -EIO;
1153
8f6e8f5e 1154 if (!f->controller)
99978832
SH
1155 return -EINVAL;
1156
35482f91 1157 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
2c51f8dd
SH
1158 return -EINVAL;
1159 }
1160 free_key(k);
99978832 1161
99978832 1162
2c51f8dd
SH
1163 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY)) { // should never get here
1164 ret = -EACCES;
1165 goto out;
1166 }
a05660a6 1167
2c51f8dd
SH
1168 if (strcmp(f->file, "tasks") == 0 ||
1169 strcmp(f->file, "/tasks") == 0 ||
1170 strcmp(f->file, "/cgroup.procs") == 0 ||
1171 strcmp(f->file, "cgroup.procs") == 0)
1172 // special case - we have to translate the pids
1173 r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data);
1174 else
35482f91 1175 r = cgfs_get_value(f->controller, f->cgroup, f->file, &data);
99978832 1176
2c51f8dd
SH
1177 if (!r) {
1178 ret = -EINVAL;
1179 goto out;
1180 }
99978832 1181
2c51f8dd
SH
1182 if (!data) {
1183 ret = 0;
1184 goto out;
99978832 1185 }
2c51f8dd
SH
1186 s = strlen(data);
1187 if (s > size)
1188 s = size;
1189 memcpy(buf, data, s);
1190 if (s > 0 && s < size && data[s-1] != '\n')
1191 buf[s++] = '\n';
99978832 1192
2c51f8dd
SH
1193 ret = s;
1194
1195out:
1196 free(data);
1197 return ret;
99978832
SH
1198}
1199
4775fba1
SH
1200static void pid_from_ns(int sock, pid_t tpid)
1201{
1202 pid_t vpid;
1203 struct ucred cred;
1204 char v;
6ee867dc
SH
1205 struct timeval tv;
1206 fd_set s;
1207 int ret;
4775fba1
SH
1208
1209 cred.uid = 0;
1210 cred.gid = 0;
6ee867dc
SH
1211 while (1) {
1212 FD_ZERO(&s);
1213 FD_SET(sock, &s);
1214 tv.tv_sec = 2;
1215 tv.tv_usec = 0;
1216 ret = select(sock+1, &s, NULL, NULL, &tv);
ea56f722
SH
1217 if (ret <= 0) {
1218 fprintf(stderr, "%s: bad select before read from parent: %s\n",
6ee867dc 1219 __func__, strerror(errno));
67bd113f 1220 _exit(1);
6ee867dc
SH
1221 }
1222 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
1223 fprintf(stderr, "%s: bad read from parent: %s\n",
1224 __func__, strerror(errno));
67bd113f 1225 _exit(1);
6ee867dc 1226 }
4775fba1 1227 if (vpid == -1) // done
01e71852 1228 break;
4775fba1
SH
1229 v = '0';
1230 cred.pid = vpid;
01e71852 1231 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
4775fba1
SH
1232 v = '1';
1233 cred.pid = getpid();
01e71852 1234 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
67bd113f 1235 _exit(1);
4775fba1
SH
1236 }
1237 }
67bd113f 1238 _exit(0);
4775fba1
SH
1239}
1240
1241static void pid_from_ns_wrapper(int sock, pid_t tpid)
1242{
ea56f722 1243 int newnsfd = -1, ret, cpipe[2];
4775fba1
SH
1244 char fnam[100];
1245 pid_t cpid;
ea56f722
SH
1246 fd_set s;
1247 struct timeval tv;
1248 char v;
4775fba1 1249
c0adec85
SH
1250 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1251 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1252 _exit(1);
4775fba1
SH
1253 newnsfd = open(fnam, O_RDONLY);
1254 if (newnsfd < 0)
67bd113f 1255 _exit(1);
4775fba1 1256 if (setns(newnsfd, 0) < 0)
67bd113f 1257 _exit(1);
4775fba1
SH
1258 close(newnsfd);
1259
ea56f722 1260 if (pipe(cpipe) < 0)
67bd113f 1261 _exit(1);
ea56f722
SH
1262
1263loop:
4775fba1
SH
1264 cpid = fork();
1265
1266 if (cpid < 0)
67bd113f 1267 _exit(1);
ea56f722
SH
1268
1269 if (!cpid) {
1270 char b = '1';
1271 close(cpipe[0]);
1272 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1273 fprintf(stderr, "%s (child): erorr on write: %s\n",
1274 __func__, strerror(errno));
1275 }
1276 close(cpipe[1]);
4775fba1 1277 pid_from_ns(sock, tpid);
ea56f722
SH
1278 }
1279
1280 // give the child 1 second to be done forking and
1281 // write it's ack
1282 FD_ZERO(&s);
1283 FD_SET(cpipe[0], &s);
1284 tv.tv_sec = 1;
1285 tv.tv_usec = 0;
1286 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
1287 if (ret <= 0)
1288 goto again;
1289 ret = read(cpipe[0], &v, 1);
1290 if (ret != sizeof(char) || v != '1') {
1291 goto again;
1292 }
1293
4775fba1 1294 if (!wait_for_pid(cpid))
67bd113f
SH
1295 _exit(1);
1296 _exit(0);
ea56f722
SH
1297
1298again:
1299 kill(cpid, SIGKILL);
1300 wait_for_pid(cpid);
1301 goto loop;
4775fba1
SH
1302}
1303
1304static bool do_write_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, const char *buf)
1305{
1306 int sock[2] = {-1, -1};
1307 pid_t qpid, cpid = -1;
35482f91 1308 FILE *pids_file = NULL;
4775fba1
SH
1309 bool answer = false, fail = false;
1310
35482f91
SH
1311 pids_file = open_pids_file(contrl, cg);
1312 if (!pids_file)
1313 return false;
1314
4775fba1
SH
1315 /*
1316 * write the pids to a socket, have helper in writer's pidns
1317 * call movepid for us
1318 */
1319 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1320 perror("socketpair");
35482f91 1321 goto out;
4775fba1
SH
1322 }
1323
1324 cpid = fork();
1325 if (cpid == -1)
1326 goto out;
1327
35482f91
SH
1328 if (!cpid) { // child
1329 fclose(pids_file);
4775fba1 1330 pid_from_ns_wrapper(sock[1], tpid);
35482f91 1331 }
4775fba1
SH
1332
1333 const char *ptr = buf;
1334 while (sscanf(ptr, "%d", &qpid) == 1) {
1335 struct ucred cred;
1336 char v;
1337
1338 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1339 fprintf(stderr, "%s: error writing pid to child: %s\n",
1340 __func__, strerror(errno));
4775fba1
SH
1341 goto out;
1342 }
1343
01e71852
SH
1344 if (recv_creds(sock[0], &cred, &v)) {
1345 if (v == '0') {
35482f91 1346 if (fprintf(pids_file, "%d", (int) cred.pid) < 0)
01e71852
SH
1347 fail = true;
1348 }
4775fba1
SH
1349 }
1350
1351 ptr = strchr(ptr, '\n');
1352 if (!ptr)
1353 break;
1354 ptr++;
1355 }
1356
1357 /* All good, write the value */
1358 qpid = -1;
1359 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
1420baf8 1360 fprintf(stderr, "Warning: failed to ask child to exit\n");
4775fba1
SH
1361
1362 if (!fail)
1363 answer = true;
1364
1365out:
1366 if (cpid != -1)
1367 wait_for_pid(cpid);
1368 if (sock[0] != -1) {
1369 close(sock[0]);
1370 close(sock[1]);
1371 }
35482f91
SH
1372 if (pids_file) {
1373 if (fclose(pids_file) != 0)
1374 answer = false;
1375 }
4775fba1
SH
1376 return answer;
1377}
1378
2ad6d2bd
SH
1379int cg_write(const char *path, const char *buf, size_t size, off_t offset,
1380 struct fuse_file_info *fi)
1381{
2ad6d2bd 1382 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1383 char *localbuf = NULL;
35482f91 1384 struct cgfs_files *k = NULL;
8f6e8f5e 1385 struct file_info *f = (struct file_info *)fi->fh;
2c51f8dd 1386 bool r;
2ad6d2bd 1387
443d13f5 1388 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1389 fprintf(stderr, "Internal error: directory cache info used in cg_write\n");
1390 return -EIO;
1391 }
1392
2ad6d2bd 1393 if (offset)
7253e0a4 1394 return 0;
2ad6d2bd
SH
1395
1396 if (!fc)
1397 return -EIO;
1398
2c51f8dd 1399 localbuf = alloca(size+1);
47cbf0e5
SH
1400 localbuf[size] = '\0';
1401 memcpy(localbuf, buf, size);
2ad6d2bd 1402
35482f91 1403 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
2c51f8dd
SH
1404 size = -EINVAL;
1405 goto out;
1406 }
2ad6d2bd 1407
2c51f8dd
SH
1408 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY)) {
1409 size = -EACCES;
1410 goto out;
1411 }
4775fba1 1412
2c51f8dd
SH
1413 if (strcmp(f->file, "tasks") == 0 ||
1414 strcmp(f->file, "/tasks") == 0 ||
1415 strcmp(f->file, "/cgroup.procs") == 0 ||
1416 strcmp(f->file, "cgroup.procs") == 0)
1417 // special case - we have to translate the pids
1418 r = do_write_pids(fc->pid, f->controller, f->cgroup, f->file, localbuf);
1419 else
35482f91 1420 r = cgfs_set_value(f->controller, f->cgroup, f->file, localbuf);
2ad6d2bd 1421
2c51f8dd
SH
1422 if (!r)
1423 size = -EINVAL;
2ad6d2bd 1424
2c51f8dd
SH
1425out:
1426 free_key(k);
1427 return size;
2ad6d2bd
SH
1428}
1429
341b21ad
SH
1430int cg_chown(const char *path, uid_t uid, gid_t gid)
1431{
1432 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1433 char *cgdir = NULL, *fpath = NULL, *path1, *path2, *controller;
35482f91 1434 struct cgfs_files *k = NULL;
341b21ad 1435 const char *cgroup;
2c51f8dd 1436 int ret;
341b21ad
SH
1437
1438 if (!fc)
1439 return -EIO;
1440
1441 if (strcmp(path, "/cgroup") == 0)
1442 return -EINVAL;
1443
1444 controller = pick_controller_from_path(fc, path);
1445 if (!controller)
f9a05025 1446 return -EINVAL;
341b21ad
SH
1447 cgroup = find_cgroup_in_path(path);
1448 if (!cgroup)
1449 /* this is just /cgroup/controller */
1450 return -EINVAL;
1451
1452 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1453
1454 if (!fpath) {
1455 path1 = "/";
1456 path2 = cgdir;
1457 } else {
1458 path1 = cgdir;
1459 path2 = fpath;
1460 }
1461
1462 if (is_child_cgroup(controller, path1, path2)) {
1463 // get uid, gid, from '/tasks' file and make up a mode
1464 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
35482f91 1465 k = cgfs_get_key(controller, cgroup, "tasks");
341b21ad
SH
1466
1467 } else
35482f91 1468 k = cgfs_get_key(controller, path1, path2);
341b21ad 1469
2c51f8dd
SH
1470 if (!k) {
1471 ret = -EINVAL;
1472 goto out;
1473 }
341b21ad
SH
1474
1475 /*
1476 * This being a fuse request, the uid and gid must be valid
1477 * in the caller's namespace. So we can just check to make
1478 * sure that the caller is root in his uid, and privileged
1479 * over the file's current owner.
1480 */
2c51f8dd
SH
1481 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) {
1482 ret = -EACCES;
1483 goto out;
1484 }
341b21ad 1485
35482f91 1486 if (!cgfs_chown_file(controller, cgroup, uid, gid)) {
2c51f8dd
SH
1487 ret = -EINVAL;
1488 goto out;
1489 }
1490
1491 ret = 0;
1492
1493out:
1494 free_key(k);
1495 free(cgdir);
1496
1497 return ret;
341b21ad 1498}
2ad6d2bd 1499
fd2e4e03
SH
1500int cg_chmod(const char *path, mode_t mode)
1501{
0a1bb5ea 1502 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1503 char * cgdir = NULL, *fpath = NULL, *path1, *path2, *controller;
35482f91 1504 struct cgfs_files *k = NULL;
0a1bb5ea 1505 const char *cgroup;
2c51f8dd 1506 int ret;
0a1bb5ea
SH
1507
1508 if (!fc)
1509 return -EIO;
1510
1511 if (strcmp(path, "/cgroup") == 0)
1512 return -EINVAL;
1513
1514 controller = pick_controller_from_path(fc, path);
1515 if (!controller)
f9a05025 1516 return -EINVAL;
0a1bb5ea
SH
1517 cgroup = find_cgroup_in_path(path);
1518 if (!cgroup)
1519 /* this is just /cgroup/controller */
1520 return -EINVAL;
1521
1522 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1523
1524 if (!fpath) {
1525 path1 = "/";
1526 path2 = cgdir;
1527 } else {
1528 path1 = cgdir;
1529 path2 = fpath;
1530 }
1531
1532 if (is_child_cgroup(controller, path1, path2)) {
1533 // get uid, gid, from '/tasks' file and make up a mode
1534 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
35482f91 1535 k = cgfs_get_key(controller, cgroup, "tasks");
0a1bb5ea
SH
1536
1537 } else
35482f91 1538 k = cgfs_get_key(controller, path1, path2);
0a1bb5ea 1539
2c51f8dd
SH
1540 if (!k) {
1541 ret = -EINVAL;
1542 goto out;
1543 }
0a1bb5ea
SH
1544
1545 /*
1546 * This being a fuse request, the uid and gid must be valid
1547 * in the caller's namespace. So we can just check to make
1548 * sure that the caller is root in his uid, and privileged
1549 * over the file's current owner.
1550 */
2c51f8dd
SH
1551 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
1552 ret = -EPERM;
1553 goto out;
1554 }
0a1bb5ea 1555
35482f91 1556 if (!cgfs_chmod_file(controller, cgroup, mode)) {
2c51f8dd
SH
1557 ret = -EINVAL;
1558 goto out;
1559 }
1560
1561 ret = 0;
1562out:
1563 free_key(k);
1564 free(cgdir);
1565 return ret;
fd2e4e03
SH
1566}
1567
ab54b798
SH
1568int cg_mkdir(const char *path, mode_t mode)
1569{
1570 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1571 char *fpath = NULL, *path1, *cgdir = NULL, *controller;
ab54b798 1572 const char *cgroup;
2c51f8dd 1573 int ret;
ab54b798 1574
ab54b798
SH
1575 if (!fc)
1576 return -EIO;
1577
1578
1579 controller = pick_controller_from_path(fc, path);
1580 if (!controller)
f9a05025 1581 return -EINVAL;
ab54b798
SH
1582
1583 cgroup = find_cgroup_in_path(path);
1584 if (!cgroup)
f9a05025 1585 return -EINVAL;
ab54b798
SH
1586
1587 get_cgdir_and_path(cgroup, &cgdir, &fpath);
1588 if (!fpath)
1589 path1 = "/";
1590 else
1591 path1 = cgdir;
1592
2c51f8dd
SH
1593 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) {
1594 ret = -EACCES;
1595 goto out;
1596 }
1597 if (!caller_is_in_ancestor(fc->pid, controller, path1, NULL)) {
1598 ret = -EACCES;
1599 goto out;
1600 }
ab54b798 1601
2c51f8dd 1602 if (fc->uid == 0 && fc->gid == 0) {
35482f91 1603 if (!cgfs_create(controller, cgroup)) {
2c51f8dd
SH
1604 ret = -EINVAL;
1605 goto out;
1606 }
1607 } else {
35482f91
SH
1608 if (setresuid(fc->uid, fc->gid, 0) < 0) { // bail
1609 fprintf(stderr, "ERROR - DANGER - setresuid failed!\n");
1610 exit(1);
1611 }
1612
1613 bool bret = cgfs_create(controller, cgroup);
1614
1615 if (setresuid(0, 0, 0) < 0) {
1616 fprintf(stderr, "ERROR - failed to restore uids!\n");
1617 exit(1);
1618 }
1619 if (!bret) {
2c51f8dd
SH
1620 ret = -EINVAL;
1621 goto out;
1622 }
2c51f8dd 1623 }
ab54b798 1624
2c51f8dd 1625 ret = 0;
ab54b798 1626
2c51f8dd
SH
1627out:
1628 free(cgdir);
1629 return ret;
ab54b798
SH
1630}
1631
50d8d5b5
SH
1632static int cg_rmdir(const char *path)
1633{
1634 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1635 char *fpath = NULL, *cgdir = NULL, *controller;
50d8d5b5 1636 const char *cgroup;
2c51f8dd 1637 int ret;
50d8d5b5
SH
1638
1639 if (!fc)
1640 return -EIO;
1641
50d8d5b5
SH
1642 controller = pick_controller_from_path(fc, path);
1643 if (!controller)
f9a05025 1644 return -EINVAL;
50d8d5b5
SH
1645
1646 cgroup = find_cgroup_in_path(path);
1647 if (!cgroup)
f9a05025 1648 return -EINVAL;
50d8d5b5
SH
1649
1650 get_cgdir_and_path(cgroup, &cgdir, &fpath);
2c51f8dd
SH
1651 if (!fpath) {
1652 ret = -EINVAL;
1653 goto out;
1654 }
50d8d5b5 1655
2c51f8dd
SH
1656 fprintf(stderr, "rmdir: verifying access to %s:%s (req path %s)\n",
1657 controller, cgdir, path);
1658 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) {
1659 ret = -EACCES;
1660 goto out;
1661 }
1662 if (!caller_is_in_ancestor(fc->pid, controller, cgroup, NULL)) {
1663 ret = -EACCES;
1664 goto out;
1665 }
50d8d5b5 1666
35482f91 1667 if (!cgfs_remove(controller, cgroup)) {
2c51f8dd
SH
1668 ret = -EINVAL;
1669 goto out;
1670 }
50d8d5b5 1671
2c51f8dd
SH
1672 ret = 0;
1673
1674out:
1675 free(cgdir);
1676 return ret;
50d8d5b5
SH
1677}
1678
2dc17609
SH
1679static bool startswith(const char *line, const char *pref)
1680{
1681 if (strncmp(line, pref, strlen(pref)) == 0)
1682 return true;
1683 return false;
1684}
1685
1686static void get_mem_cached(char *memstat, unsigned long *v)
1687{
1688 char *eol;
1689
1690 *v = 0;
1691 while (*memstat) {
1692 if (startswith(memstat, "total_cache")) {
1693 sscanf(memstat + 11, "%lu", v);
1694 *v /= 1024;
1695 return;
1696 }
1697 eol = strchr(memstat, '\n');
1698 if (!eol)
1699 return;
1700 memstat = eol+1;
1701 }
1702}
1703
49878439 1704static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
2f919d9d 1705{
49878439
YY
1706 char *eol;
1707 char key[32];
2f919d9d 1708
49878439
YY
1709 memset(key, 0, 32);
1710 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
2f919d9d 1711
49878439
YY
1712 size_t len = strlen(key);
1713 *v = 0;
1714
1715 while (*str) {
1716 if (startswith(str, key)) {
2f919d9d
SH
1717 sscanf(str + len, "%lu", v);
1718 return;
1719 }
1720 eol = strchr(str, '\n');
49878439 1721 if (!eol)
2f919d9d 1722 return;
49878439
YY
1723 str = eol+1;
1724 }
1725}
1726
53b43826
SH
1727static int read_file(const char *path, char *buf, size_t size,
1728 struct file_info *d)
1729{
1730 size_t linelen = 0, total_len = 0, rv = 0;
1731 char *line = NULL;
1732 char *cache = d->buf;
1733 size_t cache_size = d->buflen;
1734 FILE *f = fopen(path, "r");
1735 if (!f)
1736 return 0;
1737
1738 while (getline(&line, &linelen, f) != -1) {
1739 size_t l = snprintf(cache, cache_size, "%s", line);
1740 if (l < 0) {
1741 perror("Error writing to cache");
1742 rv = 0;
1743 goto err;
1744 }
1745 if (l >= cache_size) {
1746 fprintf(stderr, "Internal error: truncated write to cache\n");
1747 rv = 0;
1748 goto err;
1749 }
1750 if (l < cache_size) {
1751 cache += l;
1752 cache_size -= l;
1753 total_len += l;
1754 } else {
1755 cache += cache_size;
1756 total_len += cache_size;
1757 cache_size = 0;
1758 break;
1759 }
1760 }
1761
1762 d->size = total_len;
1763 if (total_len > size ) total_len = size;
1764
1765 /* read from off 0 */
1766 memcpy(buf, d->buf, total_len);
1767 rv = total_len;
1768 err:
1769 fclose(f);
1770 free(line);
1771 return rv;
1772}
1773
758ad80c 1774/*
2ad6d2bd 1775 * FUSE ops for /proc
758ad80c 1776 */
758ad80c 1777
7bc95a75
SH
1778static unsigned long get_memlimit(const char *cgroup)
1779{
1780 char *memlimit_str = NULL;
1781 unsigned long memlimit = -1;
1782
35482f91 1783 if (cgfs_get_value("memory", cgroup, "memory.limit_in_bytes", &memlimit_str))
7bc95a75
SH
1784 memlimit = strtoul(memlimit_str, NULL, 10);
1785
1786 free(memlimit_str);
1787
1788 return memlimit;
1789}
1790
1791static unsigned long get_min_memlimit(const char *cgroup)
1792{
1793 char *copy = strdupa(cgroup);
1794 unsigned long memlimit = 0, retlimit;
1795
1796 retlimit = get_memlimit(copy);
1797
1798 while (strcmp(copy, "/") != 0) {
1799 copy = dirname(copy);
1800 memlimit = get_memlimit(copy);
1801 if (memlimit != -1 && memlimit < retlimit)
1802 retlimit = memlimit;
1803 };
1804
1805 return retlimit;
1806}
1807
23ce2127
SH
1808static int proc_meminfo_read(char *buf, size_t size, off_t offset,
1809 struct fuse_file_info *fi)
1810{
2dc17609 1811 struct fuse_context *fc = fuse_get_context();
97f1f27b 1812 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd 1813 char *cg;
7bc95a75 1814 char *memusage_str = NULL, *memstat_str = NULL;
2dc17609
SH
1815 unsigned long memlimit = 0, memusage = 0, cached = 0, hosttotal = 0;
1816 char *line = NULL;
e1068397 1817 size_t linelen = 0, total_len = 0, rv = 0;
97f1f27b
YY
1818 char *cache = d->buf;
1819 size_t cache_size = d->buflen;
2c51f8dd 1820 FILE *f = NULL;
2dc17609 1821
97f1f27b
YY
1822 if (offset){
1823 if (offset > d->size)
1824 return -EINVAL;
b5ad2d21
SH
1825 if (!d->cached)
1826 return 0;
97f1f27b
YY
1827 int left = d->size - offset;
1828 total_len = left > size ? size: left;
1829 memcpy(buf, cache + offset, total_len);
1830 return total_len;
1831 }
2dc17609 1832
2c51f8dd 1833 cg = get_pid_cgroup(fc->pid, "memory");
2dc17609 1834 if (!cg)
53b43826 1835 return read_file("/proc/meminfo", buf, size, d);
2dc17609 1836
7bc95a75 1837 memlimit = get_min_memlimit(cg);
35482f91 1838 if (!cgfs_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
2c51f8dd 1839 goto err;
35482f91 1840 if (!cgfs_get_value("memory", cg, "memory.stat", &memstat_str))
2c51f8dd 1841 goto err;
2dc17609
SH
1842 memusage = strtoul(memusage_str, NULL, 10);
1843 memlimit /= 1024;
1844 memusage /= 1024;
1845 get_mem_cached(memstat_str, &cached);
1846
1847 f = fopen("/proc/meminfo", "r");
1848 if (!f)
2c51f8dd 1849 goto err;
2dc17609
SH
1850
1851 while (getline(&line, &linelen, f) != -1) {
1852 size_t l;
1853 char *printme, lbuf[100];
1854
1855 memset(lbuf, 0, 100);
1856 if (startswith(line, "MemTotal:")) {
1857 sscanf(line+14, "%lu", &hosttotal);
1858 if (hosttotal < memlimit)
1859 memlimit = hosttotal;
1860 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
1861 printme = lbuf;
1862 } else if (startswith(line, "MemFree:")) {
1863 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
1864 printme = lbuf;
1865 } else if (startswith(line, "MemAvailable:")) {
1866 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
1867 printme = lbuf;
1868 } else if (startswith(line, "Buffers:")) {
1869 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
1870 printme = lbuf;
1871 } else if (startswith(line, "Cached:")) {
1872 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
1873 printme = lbuf;
1874 } else if (startswith(line, "SwapCached:")) {
1875 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
1876 printme = lbuf;
1877 } else
1878 printme = line;
97f1f27b
YY
1879
1880 l = snprintf(cache, cache_size, "%s", printme);
e1068397
MM
1881 if (l < 0) {
1882 perror("Error writing to cache");
1883 rv = 0;
1884 goto err;
1885
1886 }
1887 if (l >= cache_size) {
1888 fprintf(stderr, "Internal error: truncated write to cache\n");
1889 rv = 0;
1890 goto err;
1891 }
1892
97f1f27b
YY
1893 cache += l;
1894 cache_size -= l;
2f919d9d 1895 total_len += l;
2dc17609
SH
1896 }
1897
b5ad2d21 1898 d->cached = 1;
97f1f27b
YY
1899 d->size = total_len;
1900 if (total_len > size ) total_len = size;
1901 memcpy(buf, d->buf, total_len);
1902
e1068397 1903 rv = total_len;
2c51f8dd
SH
1904err:
1905 if (f)
1906 fclose(f);
92c84dc4 1907 free(line);
2c51f8dd 1908 free(cg);
2c51f8dd
SH
1909 free(memusage_str);
1910 free(memstat_str);
e1068397 1911 return rv;
23ce2127
SH
1912}
1913
1914/*
1915 * Read the cpuset.cpus for cg
2c51f8dd 1916 * Return the answer in a newly allocated string which must be freed
23ce2127
SH
1917 */
1918static char *get_cpuset(const char *cg)
1919{
1920 char *answer;
1921
35482f91 1922 if (!cgfs_get_value("cpuset", cg, "cpuset.cpus", &answer))
23ce2127
SH
1923 return NULL;
1924 return answer;
1925}
1926
fa47bb52 1927bool cpu_in_cpuset(int cpu, const char *cpuset);
23ce2127 1928
aeb56147
SH
1929static bool cpuline_in_cpuset(const char *line, const char *cpuset)
1930{
1931 int cpu;
1932
1933 if (sscanf(line, "processor : %d", &cpu) != 1)
1934 return false;
1935 return cpu_in_cpuset(cpu, cpuset);
1936}
1937
23ce2127
SH
1938/*
1939 * check whether this is a '^processor" line in /proc/cpuinfo
1940 */
1941static bool is_processor_line(const char *line)
1942{
1943 int cpu;
1944
1945 if (sscanf(line, "processor : %d", &cpu) == 1)
1946 return true;
1947 return false;
1948}
1949
23ce2127
SH
1950static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
1951 struct fuse_file_info *fi)
1952{
1953 struct fuse_context *fc = fuse_get_context();
97f1f27b 1954 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
1955 char *cg;
1956 char *cpuset = NULL;
23ce2127 1957 char *line = NULL;
e1068397 1958 size_t linelen = 0, total_len = 0, rv = 0;
23ce2127
SH
1959 bool am_printing = false;
1960 int curcpu = -1;
97f1f27b
YY
1961 char *cache = d->buf;
1962 size_t cache_size = d->buflen;
2c51f8dd 1963 FILE *f = NULL;
23ce2127 1964
97f1f27b
YY
1965 if (offset){
1966 if (offset > d->size)
1967 return -EINVAL;
b5ad2d21
SH
1968 if (!d->cached)
1969 return 0;
97f1f27b
YY
1970 int left = d->size - offset;
1971 total_len = left > size ? size: left;
1972 memcpy(buf, cache + offset, total_len);
2f919d9d 1973 return total_len;
97f1f27b 1974 }
23ce2127 1975
2c51f8dd 1976 cg = get_pid_cgroup(fc->pid, "cpuset");
23ce2127 1977 if (!cg)
53b43826 1978 return read_file("proc/cpuinfo", buf, size, d);
23ce2127
SH
1979
1980 cpuset = get_cpuset(cg);
1981 if (!cpuset)
2c51f8dd 1982 goto err;
23ce2127
SH
1983
1984 f = fopen("/proc/cpuinfo", "r");
1985 if (!f)
2c51f8dd 1986 goto err;
23ce2127
SH
1987
1988 while (getline(&line, &linelen, f) != -1) {
1989 size_t l;
1990 if (is_processor_line(line)) {
aeb56147 1991 am_printing = cpuline_in_cpuset(line, cpuset);
23ce2127
SH
1992 if (am_printing) {
1993 curcpu ++;
97f1f27b 1994 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
e1068397
MM
1995 if (l < 0) {
1996 perror("Error writing to cache");
1997 rv = 0;
1998 goto err;
1999 }
2000 if (l >= cache_size) {
2001 fprintf(stderr, "Internal error: truncated write to cache\n");
2002 rv = 0;
2003 goto err;
2004 }
97f1f27b
YY
2005 if (l < cache_size){
2006 cache += l;
2007 cache_size -= l;
2008 total_len += l;
2009 }else{
2010 cache += cache_size;
2011 total_len += cache_size;
2012 cache_size = 0;
2013 break;
2014 }
23ce2127
SH
2015 }
2016 continue;
2017 }
2018 if (am_printing) {
97f1f27b 2019 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2020 if (l < 0) {
2021 perror("Error writing to cache");
2022 rv = 0;
2023 goto err;
2024 }
2025 if (l >= cache_size) {
2026 fprintf(stderr, "Internal error: truncated write to cache\n");
2027 rv = 0;
2028 goto err;
2029 }
97f1f27b
YY
2030 if (l < cache_size) {
2031 cache += l;
2032 cache_size -= l;
2033 total_len += l;
2034 } else {
2035 cache += cache_size;
2036 total_len += cache_size;
2037 cache_size = 0;
2038 break;
2039 }
23ce2127
SH
2040 }
2041 }
2042
b5ad2d21 2043 d->cached = 1;
97f1f27b
YY
2044 d->size = total_len;
2045 if (total_len > size ) total_len = size;
2046
2047 /* read from off 0 */
2048 memcpy(buf, d->buf, total_len);
e1068397 2049 rv = total_len;
2c51f8dd
SH
2050err:
2051 if (f)
2052 fclose(f);
92c84dc4 2053 free(line);
2c51f8dd
SH
2054 free(cpuset);
2055 free(cg);
e1068397 2056 return rv;
23ce2127
SH
2057}
2058
2059static int proc_stat_read(char *buf, size_t size, off_t offset,
2060 struct fuse_file_info *fi)
2061{
aeb56147 2062 struct fuse_context *fc = fuse_get_context();
97f1f27b 2063 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2064 char *cg;
2065 char *cpuset = NULL;
aeb56147 2066 char *line = NULL;
e1068397 2067 size_t linelen = 0, total_len = 0, rv = 0;
2a0fde62 2068 int curcpu = -1; /* cpu numbering starts at 0 */
97f1f27b
YY
2069 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0;
2070 unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
2071 irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0;
2072#define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
2073 char cpuall[CPUALL_MAX_SIZE];
2074 /* reserve for cpu all */
2075 char *cache = d->buf + CPUALL_MAX_SIZE;
2076 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
2c51f8dd 2077 FILE *f = NULL;
aeb56147 2078
97f1f27b
YY
2079 if (offset){
2080 if (offset > d->size)
2081 return -EINVAL;
b5ad2d21
SH
2082 if (!d->cached)
2083 return 0;
97f1f27b
YY
2084 int left = d->size - offset;
2085 total_len = left > size ? size: left;
2086 memcpy(buf, d->buf + offset, total_len);
2f919d9d 2087 return total_len;
97f1f27b 2088 }
aeb56147 2089
2c51f8dd 2090 cg = get_pid_cgroup(fc->pid, "cpuset");
aeb56147 2091 if (!cg)
53b43826 2092 return read_file("/proc/stat", buf, size, d);
aeb56147
SH
2093
2094 cpuset = get_cpuset(cg);
2095 if (!cpuset)
2c51f8dd 2096 goto err;
aeb56147
SH
2097
2098 f = fopen("/proc/stat", "r");
2099 if (!f)
2c51f8dd 2100 goto err;
aeb56147 2101
97f1f27b
YY
2102 //skip first line
2103 if (getline(&line, &linelen, f) < 0) {
2104 fprintf(stderr, "proc_stat_read read first line failed\n");
2c51f8dd 2105 goto err;
97f1f27b
YY
2106 }
2107
aeb56147
SH
2108 while (getline(&line, &linelen, f) != -1) {
2109 size_t l;
2110 int cpu;
2a0fde62 2111 char cpu_char[10]; /* That's a lot of cores */
aeb56147
SH
2112 char *c;
2113
2a0fde62
CB
2114 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
2115 /* not a ^cpuN line containing a number N, just print it */
97f1f27b 2116 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2117 if (l < 0) {
2118 perror("Error writing to cache");
2119 rv = 0;
2120 goto err;
2121 }
2122 if (l >= cache_size) {
2123 fprintf(stderr, "Internal error: truncated write to cache\n");
2124 rv = 0;
2125 goto err;
2126 }
2127 if (l < cache_size) {
97f1f27b
YY
2128 cache += l;
2129 cache_size -= l;
2130 total_len += l;
2131 continue;
e1068397 2132 } else {
97f1f27b
YY
2133 //no more space, break it
2134 cache += cache_size;
2135 total_len += cache_size;
2136 cache_size = 0;
2137 break;
2138 }
aeb56147 2139 }
2a0fde62
CB
2140
2141 if (sscanf(cpu_char, "%d", &cpu) != 1)
2142 continue;
aeb56147
SH
2143 if (!cpu_in_cpuset(cpu, cpuset))
2144 continue;
2145 curcpu ++;
2146
2147 c = strchr(line, ' ');
2148 if (!c)
2149 continue;
25c5e8fb 2150 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
e1068397
MM
2151 if (l < 0) {
2152 perror("Error writing to cache");
2153 rv = 0;
2154 goto err;
2155
2156 }
2157 if (l >= cache_size) {
2158 fprintf(stderr, "Internal error: truncated write to cache\n");
2159 rv = 0;
2160 goto err;
2161 }
2162
97f1f27b
YY
2163 cache += l;
2164 cache_size -= l;
aeb56147 2165 total_len += l;
2f919d9d 2166
97f1f27b
YY
2167 if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, &idle, &iowait, &irq,
2168 &softirq, &steal, &guest) != 9)
2169 continue;
2170 user_sum += user;
2171 nice_sum += nice;
2172 system_sum += system;
2173 idle_sum += idle;
2174 iowait_sum += iowait;
2175 irq_sum += irq;
2176 softirq_sum += softirq;
2177 steal_sum += steal;
2f919d9d 2178 guest_sum += guest;
97f1f27b
YY
2179 }
2180
2181 cache = d->buf;
2182
2f919d9d 2183 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
97f1f27b
YY
2184 "cpu ", user_sum, nice_sum, system_sum, idle_sum, iowait_sum, irq_sum, softirq_sum, steal_sum, guest_sum);
2185 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE){
2186 memcpy(cache, cpuall, cpuall_len);
2f919d9d 2187 cache += cpuall_len;
2c51f8dd 2188 } else{
97f1f27b
YY
2189 /* shouldn't happen */
2190 fprintf(stderr, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len);
2191 cpuall_len = 0;
2192 }
2193
2194 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
2195 total_len += cpuall_len;
b5ad2d21 2196 d->cached = 1;
97f1f27b
YY
2197 d->size = total_len;
2198 if (total_len > size ) total_len = size;
2199
2200 memcpy(buf, d->buf, total_len);
e1068397 2201 rv = total_len;
2c51f8dd
SH
2202
2203err:
2204 if (f)
2205 fclose(f);
92c84dc4 2206 free(line);
2c51f8dd
SH
2207 free(cpuset);
2208 free(cg);
e1068397 2209 return rv;
23ce2127
SH
2210}
2211
7bbf2246
SH
2212/*
2213 * How to guess what to present for uptime?
2214 * One thing we could do would be to take the date on the caller's
2215 * memory.usage_in_bytes file, which should equal the time of creation
2216 * of his cgroup. However, a task could be in a sub-cgroup of the
2217 * container. The same problem exists if we try to look at the ages
2218 * of processes in the caller's cgroup.
2219 *
2220 * So we'll fork a task that will enter the caller's pidns, mount a
2221 * fresh procfs, get the age of /proc/1, and pass that back over a pipe.
2222 *
2223 * For the second uptime #, we'll do as Stéphane had done, just copy
2224 * the number from /proc/uptime. Not sure how to best emulate 'idle'
2225 * time. Maybe someone can come up with a good algorithm and submit a
2226 * patch. Maybe something based on cpushare info?
2227 */
41bb9357
SH
2228
2229/* return age of the reaper for $pid, taken from ctime of its procdir */
2230static long int get_pid1_time(pid_t pid)
2231{
2232 char fnam[100];
ea56f722 2233 int fd, cpipe[2], ret;
41bb9357 2234 struct stat sb;
ea56f722
SH
2235 pid_t cpid;
2236 struct timeval tv;
2237 fd_set s;
2238 char v;
41bb9357
SH
2239
2240 if (unshare(CLONE_NEWNS))
2241 return 0;
2242
5ca64c2a
SG
2243 if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL)) {
2244 perror("rslave mount failed");
2245 return 0;
2246 }
2247
c0adec85
SH
2248 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", pid);
2249 if (ret < 0 || ret >= sizeof(fnam))
2250 return 0;
2251
41bb9357
SH
2252 fd = open(fnam, O_RDONLY);
2253 if (fd < 0) {
2254 perror("get_pid1_time open of ns/pid");
2255 return 0;
2256 }
2257 if (setns(fd, 0)) {
2258 perror("get_pid1_time setns 1");
2259 close(fd);
2260 return 0;
2261 }
2262 close(fd);
41bb9357 2263
ea56f722
SH
2264 if (pipe(cpipe) < 0)
2265 exit(1);
41bb9357 2266
ea56f722
SH
2267loop:
2268 cpid = fork();
2269 if (cpid < 0)
41bb9357 2270 return 0;
ea56f722
SH
2271
2272 if (!cpid) {
2273 char b = '1';
2274 close(cpipe[0]);
2275 if (write(cpipe[1], &b, sizeof(char)) < 0) {
2276 fprintf(stderr, "%s (child): erorr on write: %s\n",
2277 __func__, strerror(errno));
2278 }
2279 close(cpipe[1]);
2280 umount2("/proc", MNT_DETACH);
2281 if (mount("proc", "/proc", "proc", 0, NULL)) {
2282 perror("get_pid1_time mount");
2283 return 0;
2284 }
2285 ret = lstat("/proc/1", &sb);
2286 if (ret) {
2287 perror("get_pid1_time lstat");
2288 return 0;
2289 }
2290 return time(NULL) - sb.st_ctime;
41bb9357 2291 }
ea56f722
SH
2292
2293 // give the child 1 second to be done forking and
2294 // write it's ack
2295 FD_ZERO(&s);
2296 FD_SET(cpipe[0], &s);
2297 tv.tv_sec = 1;
2298 tv.tv_usec = 0;
2299 ret = select(cpipe[0]+1, &s, NULL, NULL, &tv);
2300 if (ret <= 0)
2301 goto again;
2302 ret = read(cpipe[0], &v, 1);
2303 if (ret != sizeof(char) || v != '1') {
2304 goto again;
41bb9357 2305 }
ea56f722
SH
2306
2307 wait_for_pid(cpid);
67bd113f 2308 _exit(0);
ea56f722
SH
2309
2310again:
2311 kill(cpid, SIGKILL);
2312 wait_for_pid(cpid);
2313 goto loop;
41bb9357
SH
2314}
2315
2316static long int getreaperage(pid_t qpid)
2317{
2318 int pid, mypipe[2], ret;
2319 struct timeval tv;
2320 fd_set s;
2321 long int mtime, answer = 0;
2322
2323 if (pipe(mypipe)) {
2324 return 0;
2325 }
2326
2327 pid = fork();
2328
2329 if (!pid) { // child
2330 mtime = get_pid1_time(qpid);
2331 if (write(mypipe[1], &mtime, sizeof(mtime)) != sizeof(mtime))
2332 fprintf(stderr, "Warning: bad write from getreaperage\n");
67bd113f 2333 _exit(0);
41bb9357
SH
2334 }
2335
2336 close(mypipe[1]);
2337 FD_ZERO(&s);
2338 FD_SET(mypipe[0], &s);
2339 tv.tv_sec = 1;
2340 tv.tv_usec = 0;
2341 ret = select(mypipe[0]+1, &s, NULL, NULL, &tv);
ea56f722 2342 if (ret <= 0) {
41bb9357
SH
2343 perror("select");
2344 goto out;
2345 }
2346 if (!ret) {
1420baf8 2347 fprintf(stderr, "timed out\n");
41bb9357
SH
2348 goto out;
2349 }
2350 if (read(mypipe[0], &mtime, sizeof(mtime)) != sizeof(mtime)) {
2351 perror("read");
2352 goto out;
2353 }
2354 answer = mtime;
2355
2356out:
2357 wait_for_pid(pid);
2358 close(mypipe[0]);
2359 return answer;
2360}
2361
38056ebc 2362static unsigned long int getprocidle(void)
41bb9357
SH
2363{
2364 FILE *f = fopen("/proc/uptime", "r");
38056ebc
BM
2365 unsigned long int age, idle;
2366 unsigned long int age_nsec, idle_nsec;
2367
92c84dc4 2368 int ret;
41bb9357
SH
2369 if (!f)
2370 return 0;
38056ebc 2371 ret = fscanf(f, "%lu.%02lu %lu.%02lu", &age, &age_nsec, &idle, &idle_nsec);
92c84dc4 2372 fclose(f);
38056ebc 2373 if (ret != 4)
41bb9357
SH
2374 return 0;
2375 return idle;
2376}
2377
2378/*
2379 * We read /proc/uptime and reuse its second field.
2380 * For the first field, we use the mtime for the reaper for
2381 * the calling pid as returned by getreaperage
2382 */
23ce2127
SH
2383static int proc_uptime_read(char *buf, size_t size, off_t offset,
2384 struct fuse_file_info *fi)
2385{
41bb9357 2386 struct fuse_context *fc = fuse_get_context();
97f1f27b 2387 struct file_info *d = (struct file_info *)fi->fh;
41bb9357 2388 long int reaperage = getreaperage(fc->pid);;
38056ebc 2389 unsigned long int idletime = getprocidle();
b5ad2d21 2390 char *cache = d->buf;
97f1f27b 2391 size_t total_len = 0;
41bb9357 2392
97f1f27b
YY
2393 if (offset){
2394 if (offset > d->size)
2395 return -EINVAL;
b5ad2d21
SH
2396 if (!d->cached)
2397 return 0;
2398 int left = d->size - offset;
2399 total_len = left > size ? size: left;
2400 memcpy(buf, cache + offset, total_len);
2401 return total_len;
97f1f27b
YY
2402 }
2403
f6c0b279
SH
2404 if (idletime > reaperage)
2405 idletime = reaperage;
2406
b5ad2d21 2407 total_len = snprintf(d->buf, d->size, "%ld.0 %lu.0\n", reaperage, idletime);
e1068397
MM
2408 if (total_len < 0){
2409 perror("Error writing to cache");
2410 return 0;
2411 }
cdcdb29b 2412
b5ad2d21
SH
2413 d->size = (int)total_len;
2414 d->cached = 1;
2415
2416 if (total_len > size) total_len = size;
2417
2418 memcpy(buf, d->buf, total_len);
97f1f27b 2419 return total_len;
23ce2127
SH
2420}
2421
49878439
YY
2422static int proc_diskstats_read(char *buf, size_t size, off_t offset,
2423 struct fuse_file_info *fi)
2424{
2425 char dev_name[72];
2426 struct fuse_context *fc = fuse_get_context();
97f1f27b 2427 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2428 char *cg;
2429 char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
49878439
YY
2430 *io_wait_time_str = NULL, *io_service_time_str = NULL;
2431 unsigned long read = 0, write = 0;
2432 unsigned long read_merged = 0, write_merged = 0;
2433 unsigned long read_sectors = 0, write_sectors = 0;
2434 unsigned long read_ticks = 0, write_ticks = 0;
2435 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
2436 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
b5ad2d21
SH
2437 char *cache = d->buf;
2438 size_t cache_size = d->buflen;
49878439 2439 char *line = NULL;
e1068397 2440 size_t linelen = 0, total_len = 0, rv = 0;
49878439
YY
2441 unsigned int major = 0, minor = 0;
2442 int i = 0;
2c51f8dd 2443 FILE *f = NULL;
49878439 2444
97f1f27b
YY
2445 if (offset){
2446 if (offset > d->size)
2447 return -EINVAL;
b5ad2d21
SH
2448 if (!d->cached)
2449 return 0;
2450 int left = d->size - offset;
2451 total_len = left > size ? size: left;
2452 memcpy(buf, cache + offset, total_len);
2453 return total_len;
97f1f27b 2454 }
49878439 2455
2c51f8dd 2456 cg = get_pid_cgroup(fc->pid, "blkio");
49878439 2457 if (!cg)
53b43826 2458 return read_file("/proc/diskstats", buf, size, d);
49878439 2459
35482f91 2460 if (!cgfs_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
2c51f8dd 2461 goto err;
35482f91 2462 if (!cgfs_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
2c51f8dd 2463 goto err;
35482f91 2464 if (!cgfs_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
2c51f8dd 2465 goto err;
35482f91 2466 if (!cgfs_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
2c51f8dd 2467 goto err;
35482f91 2468 if (!cgfs_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
2c51f8dd 2469 goto err;
49878439
YY
2470
2471
2472 f = fopen("/proc/diskstats", "r");
2473 if (!f)
2c51f8dd 2474 goto err;
49878439
YY
2475
2476 while (getline(&line, &linelen, f) != -1) {
2477 size_t l;
2478 char *printme, lbuf[256];
2479
c0adec85 2480 i = sscanf(line, "%u %u %71s", &major, &minor, dev_name);
49878439
YY
2481 if(i == 3){
2482 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
2483 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
2484 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
2485 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
2486 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
2487 read_sectors = read_sectors/512;
2488 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
2489 write_sectors = write_sectors/512;
2f919d9d 2490
49878439
YY
2491 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
2492 rd_svctm = rd_svctm/1000000;
2493 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
2494 rd_wait = rd_wait/1000000;
2495 read_ticks = rd_svctm + rd_wait;
2496
2497 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
2498 wr_svctm = wr_svctm/1000000;
2499 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
2500 wr_wait = wr_wait/1000000;
2501 write_ticks = wr_svctm + wr_wait;
2502
2503 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
2504 tot_ticks = tot_ticks/1000000;
2505 }else{
2506 continue;
2507 }
2508
2509 memset(lbuf, 0, 256);
2510 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
2f919d9d 2511 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
49878439
YY
2512 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
2513 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
2514 printme = lbuf;
2515 } else
2516 continue;
2517
b5ad2d21 2518 l = snprintf(cache, cache_size, "%s", printme);
e1068397
MM
2519 if (l < 0) {
2520 perror("Error writing to fuse buf");
2521 rv = 0;
2522 goto err;
2523 }
b5ad2d21 2524 if (l >= cache_size) {
e1068397
MM
2525 fprintf(stderr, "Internal error: truncated write to cache\n");
2526 rv = 0;
2527 goto err;
2528 }
b5ad2d21
SH
2529 cache += l;
2530 cache_size -= l;
49878439
YY
2531 total_len += l;
2532 }
2533
b5ad2d21 2534 d->cached = 1;
97f1f27b 2535 d->size = total_len;
b5ad2d21
SH
2536 if (total_len > size ) total_len = size;
2537 memcpy(buf, d->buf, total_len);
2538
e1068397 2539 rv = total_len;
2c51f8dd
SH
2540err:
2541 free(cg);
2542 if (f)
2543 fclose(f);
49878439 2544 free(line);
2c51f8dd
SH
2545 free(io_serviced_str);
2546 free(io_merged_str);
2547 free(io_service_bytes_str);
2548 free(io_wait_time_str);
2549 free(io_service_time_str);
e1068397 2550 return rv;
49878439
YY
2551}
2552
23ce2127
SH
2553static off_t get_procfile_size(const char *which)
2554{
2555 FILE *f = fopen(which, "r");
2556 char *line = NULL;
2557 size_t len = 0;
2558 ssize_t sz, answer = 0;
2559 if (!f)
2560 return 0;
2561
2562 while ((sz = getline(&line, &len, f)) != -1)
2563 answer += sz;
2564 fclose (f);
92c84dc4 2565 free(line);
23ce2127
SH
2566
2567 return answer;
2568}
2569
758ad80c
SH
2570static int proc_getattr(const char *path, struct stat *sb)
2571{
35629743
SH
2572 struct timespec now;
2573
2574 memset(sb, 0, sizeof(struct stat));
2575 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
2576 return -EINVAL;
2577 sb->st_uid = sb->st_gid = 0;
2578 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
2579 if (strcmp(path, "/proc") == 0) {
2580 sb->st_mode = S_IFDIR | 00555;
2581 sb->st_nlink = 2;
2582 return 0;
2583 }
2584 if (strcmp(path, "/proc/meminfo") == 0 ||
2585 strcmp(path, "/proc/cpuinfo") == 0 ||
2586 strcmp(path, "/proc/uptime") == 0 ||
49878439
YY
2587 strcmp(path, "/proc/stat") == 0 ||
2588 strcmp(path, "/proc/diskstats") == 0) {
7253e0a4 2589 sb->st_size = 0;
35629743
SH
2590 sb->st_mode = S_IFREG | 00444;
2591 sb->st_nlink = 1;
2592 return 0;
2593 }
2594
2595 return -ENOENT;
2596}
2597
2598static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2599 struct fuse_file_info *fi)
2600{
2601 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
2602 filler(buf, "meminfo", NULL, 0) != 0 ||
2603 filler(buf, "stat", NULL, 0) != 0 ||
49878439
YY
2604 filler(buf, "uptime", NULL, 0) != 0 ||
2605 filler(buf, "diskstats", NULL, 0) != 0)
758ad80c 2606 return -EINVAL;
758ad80c
SH
2607 return 0;
2608}
2609
35629743
SH
2610static int proc_open(const char *path, struct fuse_file_info *fi)
2611{
96fc5ee6
SH
2612 int type = -1;
2613 struct file_info *info;
2614
2615 if (strcmp(path, "/proc/meminfo") == 0)
2616 type = LXC_TYPE_PROC_MEMINFO;
2617 else if (strcmp(path, "/proc/cpuinfo") == 0)
2618 type = LXC_TYPE_PROC_CPUINFO;
2619 else if (strcmp(path, "/proc/uptime") == 0)
2620 type = LXC_TYPE_PROC_UPTIME;
2621 else if (strcmp(path, "/proc/stat") == 0)
2622 type = LXC_TYPE_PROC_STAT;
2623 else if (strcmp(path, "/proc/diskstats") == 0)
2624 type = LXC_TYPE_PROC_DISKSTATS;
2625 if (type == -1)
2626 return -ENOENT;
2627
2c51f8dd
SH
2628 info = malloc(sizeof(*info));
2629 if (!info)
2630 return -ENOMEM;
2631
96fc5ee6
SH
2632 memset(info, 0, sizeof(*info));
2633 info->type = type;
2634
97f1f27b 2635 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
2c51f8dd
SH
2636 do {
2637 info->buf = malloc(info->buflen);
2638 } while (!info->buf);
97f1f27b
YY
2639 memset(info->buf, 0, info->buflen);
2640 /* set actual size to buffer size */
2f919d9d 2641 info->size = info->buflen;
97f1f27b 2642
96fc5ee6
SH
2643 fi->fh = (unsigned long)info;
2644 return 0;
2645}
2646
2647static int proc_release(const char *path, struct fuse_file_info *fi)
2648{
2649 struct file_info *f = (struct file_info *)fi->fh;
2650
2651 do_release_file_info(f);
2652 return 0;
35629743
SH
2653}
2654
35629743
SH
2655static int proc_read(const char *path, char *buf, size_t size, off_t offset,
2656 struct fuse_file_info *fi)
2657{
96fc5ee6
SH
2658 struct file_info *f = (struct file_info *) fi->fh;
2659
2660 switch (f->type) {
2f919d9d 2661 case LXC_TYPE_PROC_MEMINFO:
23ce2127 2662 return proc_meminfo_read(buf, size, offset, fi);
96fc5ee6 2663 case LXC_TYPE_PROC_CPUINFO:
23ce2127 2664 return proc_cpuinfo_read(buf, size, offset, fi);
96fc5ee6 2665 case LXC_TYPE_PROC_UPTIME:
23ce2127 2666 return proc_uptime_read(buf, size, offset, fi);
96fc5ee6 2667 case LXC_TYPE_PROC_STAT:
23ce2127 2668 return proc_stat_read(buf, size, offset, fi);
96fc5ee6 2669 case LXC_TYPE_PROC_DISKSTATS:
49878439 2670 return proc_diskstats_read(buf, size, offset, fi);
96fc5ee6
SH
2671 default:
2672 return -EINVAL;
2673 }
35629743
SH
2674}
2675
2ad6d2bd
SH
2676/*
2677 * FUSE ops for /
2678 * these just delegate to the /proc and /cgroup ops as
2679 * needed
2680 */
758ad80c
SH
2681
2682static int lxcfs_getattr(const char *path, struct stat *sb)
2683{
2684 if (strcmp(path, "/") == 0) {
2685 sb->st_mode = S_IFDIR | 00755;
2686 sb->st_nlink = 2;
2687 return 0;
2688 }
2689 if (strncmp(path, "/cgroup", 7) == 0) {
2690 return cg_getattr(path, sb);
2691 }
35629743 2692 if (strncmp(path, "/proc", 5) == 0) {
758ad80c
SH
2693 return proc_getattr(path, sb);
2694 }
2695 return -EINVAL;
2696}
2697
2698static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
2699{
2700 if (strcmp(path, "/") == 0)
2701 return 0;
2702
2703 if (strncmp(path, "/cgroup", 7) == 0) {
2704 return cg_opendir(path, fi);
2705 }
35629743
SH
2706 if (strcmp(path, "/proc") == 0)
2707 return 0;
2708 return -ENOENT;
758ad80c
SH
2709}
2710
2711static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2712 struct fuse_file_info *fi)
2713{
2714 if (strcmp(path, "/") == 0) {
2715 if (filler(buf, "proc", NULL, 0) != 0 ||
2716 filler(buf, "cgroup", NULL, 0) != 0)
2717 return -EINVAL;
2718 return 0;
2719 }
35629743 2720 if (strncmp(path, "/cgroup", 7) == 0)
758ad80c 2721 return cg_readdir(path, buf, filler, offset, fi);
35629743
SH
2722 if (strcmp(path, "/proc") == 0)
2723 return proc_readdir(path, buf, filler, offset, fi);
758ad80c
SH
2724 return -EINVAL;
2725}
2726
2727static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
2728{
2729 if (strcmp(path, "/") == 0)
2730 return 0;
2731 if (strncmp(path, "/cgroup", 7) == 0) {
2732 return cg_releasedir(path, fi);
2733 }
35629743
SH
2734 if (strcmp(path, "/proc") == 0)
2735 return 0;
758ad80c
SH
2736 return -EINVAL;
2737}
2738
99978832
SH
2739static int lxcfs_open(const char *path, struct fuse_file_info *fi)
2740{
35629743 2741 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2742 return cg_open(path, fi);
35629743
SH
2743 if (strncmp(path, "/proc", 5) == 0)
2744 return proc_open(path, fi);
99978832
SH
2745
2746 return -EINVAL;
2747}
2748
2749static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
2750 struct fuse_file_info *fi)
2751{
35629743 2752 if (strncmp(path, "/cgroup", 7) == 0)
99978832 2753 return cg_read(path, buf, size, offset, fi);
35629743
SH
2754 if (strncmp(path, "/proc", 5) == 0)
2755 return proc_read(path, buf, size, offset, fi);
99978832
SH
2756
2757 return -EINVAL;
2758}
2759
2ad6d2bd
SH
2760int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
2761 struct fuse_file_info *fi)
2762{
2763 if (strncmp(path, "/cgroup", 7) == 0) {
2764 return cg_write(path, buf, size, offset, fi);
2765 }
2766
2767 return -EINVAL;
2768}
2769
99978832
SH
2770static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
2771{
2772 return 0;
2773}
2774
2775static int lxcfs_release(const char *path, struct fuse_file_info *fi)
758ad80c 2776{
8f6e8f5e
SH
2777 if (strncmp(path, "/cgroup", 7) == 0)
2778 return cg_release(path, fi);
8f6e8f5e 2779 if (strncmp(path, "/proc", 5) == 0)
96fc5ee6 2780 return proc_release(path, fi);
8f6e8f5e
SH
2781
2782 return -EINVAL;
99978832
SH
2783}
2784
2785static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
2786{
2787 return 0;
758ad80c
SH
2788}
2789
ab54b798
SH
2790int lxcfs_mkdir(const char *path, mode_t mode)
2791{
2792 if (strncmp(path, "/cgroup", 7) == 0)
2793 return cg_mkdir(path, mode);
2794
2795 return -EINVAL;
2796}
2797
341b21ad
SH
2798int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
2799{
2800 if (strncmp(path, "/cgroup", 7) == 0)
2801 return cg_chown(path, uid, gid);
2802
2803 return -EINVAL;
2804}
2805
2ad6d2bd
SH
2806/*
2807 * cat first does a truncate before doing ops->write. This doesn't
2808 * really make sense for cgroups. So just return 0 always but do
2809 * nothing.
2810 */
2811int lxcfs_truncate(const char *path, off_t newsize)
2812{
2813 if (strncmp(path, "/cgroup", 7) == 0)
2814 return 0;
2815 return -EINVAL;
2816}
2817
50d8d5b5
SH
2818int lxcfs_rmdir(const char *path)
2819{
2820 if (strncmp(path, "/cgroup", 7) == 0)
2821 return cg_rmdir(path);
2822 return -EINVAL;
2823}
2824
fd2e4e03
SH
2825int lxcfs_chmod(const char *path, mode_t mode)
2826{
2827 if (strncmp(path, "/cgroup", 7) == 0)
2828 return cg_chmod(path, mode);
2829 return -EINVAL;
2830}
2831
758ad80c
SH
2832const struct fuse_operations lxcfs_ops = {
2833 .getattr = lxcfs_getattr,
2834 .readlink = NULL,
2835 .getdir = NULL,
2836 .mknod = NULL,
ab54b798 2837 .mkdir = lxcfs_mkdir,
758ad80c 2838 .unlink = NULL,
50d8d5b5 2839 .rmdir = lxcfs_rmdir,
758ad80c
SH
2840 .symlink = NULL,
2841 .rename = NULL,
2842 .link = NULL,
fd2e4e03 2843 .chmod = lxcfs_chmod,
341b21ad 2844 .chown = lxcfs_chown,
2ad6d2bd 2845 .truncate = lxcfs_truncate,
758ad80c 2846 .utime = NULL,
99978832
SH
2847
2848 .open = lxcfs_open,
2849 .read = lxcfs_read,
2850 .release = lxcfs_release,
2ad6d2bd 2851 .write = lxcfs_write,
99978832 2852
758ad80c 2853 .statfs = NULL,
99978832
SH
2854 .flush = lxcfs_flush,
2855 .fsync = lxcfs_fsync,
758ad80c
SH
2856
2857 .setxattr = NULL,
2858 .getxattr = NULL,
2859 .listxattr = NULL,
2860 .removexattr = NULL,
2861
2862 .opendir = lxcfs_opendir,
2863 .readdir = lxcfs_readdir,
2864 .releasedir = lxcfs_releasedir,
2865
2866 .fsyncdir = NULL,
2867 .init = NULL,
2868 .destroy = NULL,
2869 .access = NULL,
2870 .create = NULL,
2871 .ftruncate = NULL,
2872 .fgetattr = NULL,
2873};
2874
99978832 2875static void usage(const char *me)
758ad80c
SH
2876{
2877 fprintf(stderr, "Usage:\n");
2878 fprintf(stderr, "\n");
0b0f73db
SH
2879 fprintf(stderr, "%s mountpoint\n", me);
2880 fprintf(stderr, "%s -h\n", me);
758ad80c
SH
2881 exit(1);
2882}
2883
99978832 2884static bool is_help(char *w)
758ad80c
SH
2885{
2886 if (strcmp(w, "-h") == 0 ||
2887 strcmp(w, "--help") == 0 ||
2888 strcmp(w, "-help") == 0 ||
2889 strcmp(w, "help") == 0)
2890 return true;
2891 return false;
2892}
2893
0b0f73db
SH
2894void swallow_arg(int *argcp, char *argv[], char *which)
2895{
2896 int i;
2897
2898 for (i = 1; argv[i]; i++) {
2899 if (strcmp(argv[i], which) != 0)
2900 continue;
2901 for (; argv[i]; i++) {
2902 argv[i] = argv[i+1];
2903 }
2904 (*argcp)--;
2905 return;
2906 }
2907}
2908
2909void swallow_option(int *argcp, char *argv[], char *opt, char *v)
2910{
2911 int i;
2912
2913 for (i = 1; argv[i]; i++) {
2914 if (!argv[i+1])
2915 continue;
2916 if (strcmp(argv[i], opt) != 0)
2917 continue;
2918 if (strcmp(argv[i+1], v) != 0) {
2919 fprintf(stderr, "Warning: unexpected fuse option %s\n", v);
2920 exit(1);
2921 }
2922 for (; argv[i+1]; i++) {
2923 argv[i] = argv[i+2];
2924 }
2925 (*argcp) -= 2;
2926 return;
2927 }
2928}
2929
758ad80c
SH
2930int main(int argc, char *argv[])
2931{
c0adec85 2932 int ret = -1;
0b0f73db
SH
2933 /*
2934 * what we pass to fuse_main is:
2935 * argv[0] -s -f -o allow_other,directio argv[1] NULL
2936 */
2c51f8dd
SH
2937 int nargs = 5, cnt = 0;
2938 char *newargv[6];
758ad80c 2939
977ac879 2940#ifdef FORTRAVIS
df062bcb
SH
2941 /* for travis which runs on 12.04 */
2942 if (glib_check_version (2, 36, 0) != NULL)
2943 g_type_init ();
977ac879 2944#endif
df062bcb 2945
0b0f73db
SH
2946 /* accomodate older init scripts */
2947 swallow_arg(&argc, argv, "-s");
2948 swallow_arg(&argc, argv, "-f");
2949 swallow_option(&argc, argv, "-o", "allow_other");
2950
2e9c0b32
SH
2951 if (argc == 2 && strcmp(argv[1], "--version") == 0) {
2952 fprintf(stderr, "%s\n", VERSION);
2953 exit(0);
2954 }
0b0f73db 2955 if (argc != 2 || is_help(argv[1]))
758ad80c
SH
2956 usage(argv[0]);
2957
38a76a91 2958 newargv[cnt++] = argv[0];
38a76a91
SH
2959 newargv[cnt++] = "-f";
2960 newargv[cnt++] = "-o";
2961 newargv[cnt++] = "allow_other,direct_io";
2962 newargv[cnt++] = argv[1];
2963 newargv[cnt++] = NULL;
758ad80c 2964
35482f91 2965 if (!cgfs_setup_controllers())
c0adec85 2966 goto out;
758ad80c 2967
35482f91 2968 ret = fuse_main(nargs, newargv, &lxcfs_ops, NULL);
758ad80c 2969
c0adec85 2970out:
758ad80c 2971 return ret;
2183082c 2972}