]> git.proxmox.com Git - mirror_lxcfs.git/blame - lxcfs.c
prune unused init pid store entries
[mirror_lxcfs.git] / lxcfs.c
CommitLineData
758ad80c
SH
1/* lxcfs
2 *
b11c6ec0 3 * Copyright © 2014-2016 Canonical, Inc
758ad80c
SH
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
f2799430 6 * See COPYING file for details.
758ad80c
SH
7 */
8
758ad80c
SH
9#define FUSE_USE_VERSION 26
10
2183082c 11#include <stdio.h>
758ad80c
SH
12#include <dirent.h>
13#include <fcntl.h>
14#include <fuse.h>
15#include <unistd.h>
16#include <errno.h>
17#include <stdbool.h>
18#include <time.h>
19#include <string.h>
20#include <stdlib.h>
21#include <libgen.h>
41bb9357 22#include <sched.h>
b11c6ec0 23#include <pthread.h>
41bb9357 24#include <linux/sched.h>
a05660a6 25#include <sys/socket.h>
41bb9357 26#include <sys/mount.h>
5b2dfd85 27#include <sys/epoll.h>
41bb9357 28#include <wait.h>
758ad80c 29
977ac879 30#ifdef FORTRAVIS
df062bcb
SH
31#define GLIB_DISABLE_DEPRECATION_WARNINGS
32#include <glib-object.h>
977ac879 33#endif
df062bcb 34
35482f91 35#include "cgfs.h"
2e9c0b32 36#include "config.h" // for VERSION
758ad80c 37
443d13f5
SH
38enum {
39 LXC_TYPE_CGDIR,
40 LXC_TYPE_CGFILE,
41 LXC_TYPE_PROC_MEMINFO,
42 LXC_TYPE_PROC_CPUINFO,
43 LXC_TYPE_PROC_UPTIME,
44 LXC_TYPE_PROC_STAT,
45 LXC_TYPE_PROC_DISKSTATS,
46};
47
c688e1b3
SH
48struct file_info {
49 char *controller;
50 char *cgroup;
8f6e8f5e 51 char *file;
443d13f5 52 int type;
c688e1b3
SH
53 char *buf; // unused as of yet
54 int buflen;
97f1f27b 55 int size; //actual data size
b5ad2d21 56 int cached;
c688e1b3
SH
57};
58
97f1f27b
YY
59/* reserve buffer size, for cpuall in /proc/stat */
60#define BUF_RESERVE_SIZE 256
61
2c51f8dd 62/*
b11c6ec0
SH
63 * A table caching which pid is init for a pid namespace.
64 * When looking up which pid is init for $qpid, we first
65 * 1. Stat /proc/$qpid/ns/pid.
66 * 2. Check whether the ino_t is in our store.
67 * a. if not, fork a child in qpid's ns to send us
68 * ucred.pid = 1, and read the initpid. Cache
69 * initpid and creation time for /proc/initpid
70 * in a new store entry.
71 * b. if so, verify that /proc/initpid still matches
72 * what we have saved. If not, clear the store
73 * entry and go back to a. If so, return the
74 * cached initpid.
2c51f8dd 75 */
b11c6ec0
SH
76/* TODO - turn this into a hashtable */
77/* TODO - periodically purge the hashtable? */
78struct pidns_init_store {
79 ino_t ino; // inode number for /proc/$pid/ns/pid
80 pid_t initpid; // the pid of nit in that ns
81 long int ctime; // the time at which /proc/$initpid was created
82 struct pidns_init_store *next;
8e547050 83 long int lastcheck;
b11c6ec0
SH
84};
85
86struct pidns_init_store *pidns_inits;
87static pthread_mutex_t pidns_store_mutex = PTHREAD_MUTEX_INITIALIZER;
88static void lock_mutex(pthread_mutex_t *l)
2c51f8dd 89{
b11c6ec0 90 int ret;
2c51f8dd 91
b11c6ec0
SH
92 if ((ret = pthread_mutex_lock(l)) != 0) {
93 fprintf(stderr, "pthread_mutex_lock returned:%d %s\n", ret, strerror(ret));
94 exit(1);
95 }
96}
2c51f8dd 97
b11c6ec0
SH
98static void unlock_mutex(pthread_mutex_t *l)
99{
100 int ret;
101
102 if ((ret = pthread_mutex_unlock(l)) != 0) {
103 fprintf(stderr, "pthread_mutex_unlock returned:%d %s\n", ret, strerror(ret));
104 exit(1);
2c51f8dd 105 }
2c51f8dd
SH
106}
107
b11c6ec0
SH
108static void store_lock(void)
109{
110 lock_mutex(&pidns_store_mutex);
111}
112
113static void store_unlock(void)
114{
115 unlock_mutex(&pidns_store_mutex);
116}
117
118/* Must be called under store_lock */
119static bool initpid_still_valid(struct pidns_init_store *e, struct stat *nsfdsb)
120{
121 struct stat initsb;
122 char fnam[100];
123
124 snprintf(fnam, 100, "/proc/%d", e->initpid);
125 if (stat(fnam, &initsb) < 0)
126 return false;
127#if DEBUG
128 fprintf(stderr, "comparing ctime %ld %ld for pid %d\n",
129 e->ctime, initsb.st_ctime, e->initpid);
130#endif
131 if (e->ctime != initsb.st_ctime)
132 return false;
133 return true;
134}
135
136/* Must be called under store_lock */
137static void remove_initpid(struct pidns_init_store *e)
138{
139 struct pidns_init_store *tmp;
140
141#if DEBUG
142 fprintf(stderr, "remove_initpid: removing entry for %d\n", e->initpid);
143#endif
144 if (pidns_inits == e) {
145 pidns_inits = e->next;
146 free(e);
147 return;
148 }
149
150 tmp = pidns_inits;
151 while (tmp) {
152 if (tmp->next == e) {
153 tmp->next = e->next;
154 free(e);
155 return;
156 }
157 tmp = tmp->next;
158 }
159}
160
8e547050
SH
161#define PURGE_SECS 5
162/* Must be called under store_lock */
163static void prune_initpid_store(void)
164{
165 static long int last_prune = 0;
166 struct pidns_init_store *e, *prev, *delme;
167 long int now, threshold;
168
169 if (!last_prune) {
170 last_prune = time(NULL);
171 return;
172 }
173 now = time(NULL);
174 if (now < last_prune + PURGE_SECS)
175 return;
176#if DEBUG
177 fprintf(stderr, "pruning\n");
178#endif
179 last_prune = now;
180 threshold = now - 2 * PURGE_SECS;
181
182 for (prev = NULL, e = pidns_inits; e; ) {
183 if (e->lastcheck < threshold) {
184#if DEBUG
185 fprintf(stderr, "Removing cached entry for %d\n", e->initpid);
186#endif
187 delme = e;
188 if (prev)
189 prev->next = e->next;
190 else
191 pidns_inits = e->next;
192 e = e->next;
193 free(delme);
194 } else {
195 prev = e;
196 e = e->next;
197 }
198 }
199}
200
b11c6ec0
SH
201/* Must be called under store_lock */
202static void save_initpid(struct stat *sb, pid_t pid)
203{
204 struct pidns_init_store *e;
205 char fpath[100];
206 struct stat procsb;
207
208#if DEBUG
209 fprintf(stderr, "save_initpid: adding entry for %d\n", pid);
210#endif
211 snprintf(fpath, 100, "/proc/%d", pid);
212 if (stat(fpath, &procsb) < 0)
213 return;
214 do {
215 e = malloc(sizeof(*e));
216 } while (!e);
217 e->ino = sb->st_ino;
218 e->initpid = pid;
219 e->ctime = procsb.st_ctime;
220 e->next = pidns_inits;
8e547050 221 e->lastcheck = time(NULL);
b11c6ec0
SH
222 pidns_inits = e;
223}
224
225/*
226 * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store
227 * entry for the inode number and creation time. Verify that the init pid
228 * is still valid. If not, remove it. Return the entry if valid, NULL
229 * otherwise.
230 * Must be called under store_lock
231 */
232static struct pidns_init_store *lookup_verify_initpid(struct stat *sb)
233{
234 struct pidns_init_store *e = pidns_inits;
235 while (e) {
236 if (e->ino == sb->st_ino) {
8e547050
SH
237 if (initpid_still_valid(e, sb)) {
238 e->lastcheck = time(NULL);
b11c6ec0 239 return e;
8e547050 240 }
b11c6ec0
SH
241 remove_initpid(e);
242 return NULL;
243 }
244 e = e->next;
245 }
246
247 return NULL;
248}
249
250#define SEND_CREDS_OK 0
251#define SEND_CREDS_NOTSK 1
252#define SEND_CREDS_FAIL 2
253static bool recv_creds(int sock, struct ucred *cred, char *v);
254static int wait_for_pid(pid_t pid);
255static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst);
256
257/*
258 * fork a task which switches to @task's namespace and writes '1'.
259 * over a unix sock so we can read the task's reaper's pid in our
260 * namespace
261 */
262static void write_task_init_pid_exit(int sock, pid_t target)
263{
264 struct ucred cred;
265 char fnam[100];
266 pid_t pid;
267 char v;
268 int fd, ret;
269
270 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", (int)target);
271 if (ret < 0 || ret >= sizeof(fnam))
272 _exit(1);
273
274 fd = open(fnam, O_RDONLY);
275 if (fd < 0) {
276 perror("write_task_init_pid_exit open of ns/pid");
277 _exit(1);
278 }
279 if (setns(fd, 0)) {
280 perror("write_task_init_pid_exit setns 1");
281 close(fd);
282 _exit(1);
283 }
284 pid = fork();
285 if (pid < 0)
286 _exit(1);
287 if (pid != 0) {
288 if (!wait_for_pid(pid))
289 _exit(1);
290 _exit(0);
291 }
292
293 /* we are the child */
294 cred.uid = 0;
295 cred.gid = 0;
296 cred.pid = 1;
297 v = '1';
298 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK)
299 _exit(1);
300 _exit(0);
301}
302
303static pid_t get_init_pid_for_task(pid_t task)
304{
305 int sock[2];
306 pid_t pid;
307 pid_t ret = -1;
308 char v = '0';
309 struct ucred cred;
310
311 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
312 perror("socketpair");
313 return -1;
314 }
315
316 pid = fork();
317 if (pid < 0)
318 goto out;
319 if (!pid) {
320 close(sock[1]);
321 write_task_init_pid_exit(sock[0], task);
322 _exit(0);
323 }
324
325 if (!recv_creds(sock[1], &cred, &v))
326 goto out;
327 ret = cred.pid;
328
329out:
330 close(sock[0]);
331 close(sock[1]);
332 if (pid > 0)
333 wait_for_pid(pid);
334 return ret;
335}
336
337static pid_t lookup_initpid_in_store(pid_t qpid)
338{
339 pid_t answer = 0;
340 struct stat sb;
341 struct pidns_init_store *e;
342 char fnam[100];
343
344 snprintf(fnam, 100, "/proc/%d/ns/pid", qpid);
345 store_lock();
346 if (stat(fnam, &sb) < 0)
347 goto out;
348 e = lookup_verify_initpid(&sb);
349 if (e) {
350 answer = e->initpid;
351 goto out;
352 }
353 answer = get_init_pid_for_task(qpid);
354 if (answer > 0)
355 save_initpid(&sb, answer);
356
357out:
8e547050
SH
358 /* we prune at end in case we are returning
359 * the value we were about to return */
360 prune_initpid_store();
b11c6ec0
SH
361 store_unlock();
362 return answer;
363}
0afd85bd 364
a05660a6
SH
365static int wait_for_pid(pid_t pid)
366{
367 int status, ret;
368
87dce5f6
SH
369 if (pid <= 0)
370 return -1;
371
a05660a6
SH
372again:
373 ret = waitpid(pid, &status, 0);
374 if (ret == -1) {
375 if (errno == EINTR)
376 goto again;
377 return -1;
378 }
379 if (ret != pid)
380 goto again;
381 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
382 return -1;
383 return 0;
384}
385
b11c6ec0
SH
386
387/*
388 * append pid to *src.
389 * src: a pointer to a char* in which ot append the pid.
390 * sz: the number of characters printed so far, minus trailing \0.
391 * asz: the allocated size so far
392 * pid: the pid to append
393 */
394static void must_strcat_pid(char **src, size_t *sz, size_t *asz, pid_t pid)
395{
396 char tmp[30];
397
398 int tmplen = sprintf(tmp, "%d\n", (int)pid);
399
400 if (!*src || tmplen + *sz + 1 >= *asz) {
401 char *tmp;
402 do {
403 tmp = realloc(*src, *asz + BUF_RESERVE_SIZE);
404 } while (!tmp);
405 *src = tmp;
406 *asz += BUF_RESERVE_SIZE;
407 }
408 memcpy((*src) +*sz , tmp, tmplen);
409 *sz += tmplen;
410 (*src)[*sz] = '\0';
411}
412
053a659d
SH
413/*
414 * Given a open file * to /proc/pid/{u,g}id_map, and an id
415 * valid in the caller's namespace, return the id mapped into
416 * pid's namespace.
417 * Returns the mapped id, or -1 on error.
418 */
419unsigned int
420convert_id_to_ns(FILE *idfile, unsigned int in_id)
421{
422 unsigned int nsuid, // base id for a range in the idfile's namespace
423 hostuid, // base id for a range in the caller's namespace
424 count; // number of ids in this range
425 char line[400];
426 int ret;
427
428 fseek(idfile, 0L, SEEK_SET);
429 while (fgets(line, 400, idfile)) {
430 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
431 if (ret != 3)
432 continue;
433 if (hostuid + count < hostuid || nsuid + count < nsuid) {
434 /*
435 * uids wrapped around - unexpected as this is a procfile,
436 * so just bail.
437 */
647c89e5 438 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
053a659d
SH
439 nsuid, hostuid, count, line);
440 return -1;
441 }
442 if (hostuid <= in_id && hostuid+count > in_id) {
443 /*
444 * now since hostuid <= in_id < hostuid+count, and
445 * hostuid+count and nsuid+count do not wrap around,
446 * we know that nsuid+(in_id-hostuid) which must be
447 * less that nsuid+(count) must not wrap around
448 */
449 return (in_id - hostuid) + nsuid;
450 }
451 }
452
453 // no answer found
454 return -1;
455}
456
341b21ad
SH
457/*
458 * for is_privileged_over,
459 * specify whether we require the calling uid to be root in his
460 * namespace
461 */
462#define NS_ROOT_REQD true
463#define NS_ROOT_OPT false
464
2c51f8dd
SH
465#define PROCLEN 100
466
341b21ad 467static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
758ad80c 468{
2c51f8dd
SH
469 char fpath[PROCLEN];
470 int ret;
053a659d
SH
471 bool answer = false;
472 uid_t nsuid;
473
341b21ad
SH
474 if (victim == -1 || uid == -1)
475 return false;
476
477 /*
478 * If the request is one not requiring root in the namespace,
479 * then having the same uid suffices. (i.e. uid 1000 has write
480 * access to files owned by uid 1000
481 */
482 if (!req_ns_root && uid == victim)
758ad80c
SH
483 return true;
484
2c51f8dd
SH
485 ret = snprintf(fpath, PROCLEN, "/proc/%d/uid_map", pid);
486 if (ret < 0 || ret >= PROCLEN)
487 return false;
053a659d
SH
488 FILE *f = fopen(fpath, "r");
489 if (!f)
490 return false;
491
341b21ad 492 /* if caller's not root in his namespace, reject */
053a659d
SH
493 nsuid = convert_id_to_ns(f, uid);
494 if (nsuid)
495 goto out;
496
341b21ad
SH
497 /*
498 * If victim is not mapped into caller's ns, reject.
499 * XXX I'm not sure this check is needed given that fuse
500 * will be sending requests where the vfs has converted
501 */
053a659d
SH
502 nsuid = convert_id_to_ns(f, victim);
503 if (nsuid == -1)
504 goto out;
505
506 answer = true;
507
508out:
509 fclose(f);
510 return answer;
758ad80c
SH
511}
512
513static bool perms_include(int fmode, mode_t req_mode)
514{
2ad6d2bd
SH
515 mode_t r;
516
517 switch (req_mode & O_ACCMODE) {
518 case O_RDONLY:
519 r = S_IROTH;
520 break;
521 case O_WRONLY:
522 r = S_IWOTH;
523 break;
524 case O_RDWR:
525 r = S_IROTH | S_IWOTH;
526 break;
527 default:
528 return false;
529 }
530 return ((fmode & r) == r);
758ad80c
SH
531}
532
a8b6c3e0
SH
533
534/*
535 * taskcg is a/b/c
536 * querycg is /a/b/c/d/e
537 * we return 'd'
538 */
3db25a35
SH
539static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
540{
541 char *start, *end;
542
543 if (strlen(taskcg) <= strlen(querycg)) {
544 fprintf(stderr, "%s: I was fed bad input\n", __func__);
545 return NULL;
546 }
547
548 if (strcmp(querycg, "/") == 0)
2c51f8dd 549 start = strdup(taskcg + 1);
3db25a35 550 else
2c51f8dd
SH
551 start = strdup(taskcg + strlen(querycg) + 1);
552 if (!start)
553 return NULL;
3db25a35
SH
554 end = strchr(start, '/');
555 if (end)
556 *end = '\0';
557 return start;
558}
559
2c51f8dd
SH
560static void stripnewline(char *x)
561{
562 size_t l = strlen(x);
563 if (l && x[l-1] == '\n')
564 x[l-1] = '\0';
565}
566
567static char *get_pid_cgroup(pid_t pid, const char *contrl)
568{
569 char fnam[PROCLEN];
570 FILE *f;
571 char *answer = NULL;
572 char *line = NULL;
573 size_t len = 0;
574 int ret;
777dd831
SH
575 const char *h = find_mounted_controller(contrl);
576 if (!h)
577 return NULL;
2c51f8dd
SH
578
579 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
580 if (ret < 0 || ret >= PROCLEN)
581 return NULL;
582 if (!(f = fopen(fnam, "r")))
583 return NULL;
584
585 while (getline(&line, &len, f) != -1) {
586 char *c1, *c2;
587 if (!line[0])
588 continue;
589 c1 = strchr(line, ':');
590 if (!c1)
591 goto out;
592 c1++;
593 c2 = strchr(c1, ':');
594 if (!c2)
595 goto out;
596 *c2 = '\0';
777dd831 597 if (strcmp(c1, h) != 0)
2c51f8dd
SH
598 continue;
599 c2++;
600 stripnewline(c2);
601 do {
602 answer = strdup(c2);
603 } while (!answer);
604 break;
605 }
606
607out:
608 fclose(f);
609 free(line);
610 return answer;
611}
612
758ad80c
SH
613/*
614 * check whether a fuse context may access a cgroup dir or file
615 *
616 * If file is not null, it is a cgroup file to check under cg.
617 * If file is null, then we are checking perms on cg itself.
618 *
619 * For files we can check the mode of the list_keys result.
620 * For cgroups, we must make assumptions based on the files under the
621 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
622 * yet.
623 */
624static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
625{
35482f91 626 struct cgfs_files *k = NULL;
2c51f8dd 627 bool ret = false;
758ad80c 628
35482f91
SH
629 k = cgfs_get_key(contrl, cg, file);
630 if (!k)
758ad80c 631 return false;
35482f91
SH
632
633 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
634 if (perms_include(k->mode >> 6, mode)) {
635 ret = true;
2c51f8dd 636 goto out;
758ad80c
SH
637 }
638 }
35482f91
SH
639 if (fc->gid == k->gid) {
640 if (perms_include(k->mode >> 3, mode)) {
641 ret = true;
642 goto out;
643 }
644 }
645 ret = perms_include(k->mode, mode);
758ad80c 646
2c51f8dd 647out:
35482f91 648 free_key(k);
2c51f8dd 649 return ret;
3db25a35
SH
650}
651
04b5cbdc
SH
652#define INITSCOPE "/init.scope"
653static void prune_init_slice(char *cg)
654{
655 char *point;
656 point = cg + strlen(cg) - strlen(INITSCOPE);
657 if (point < cg)
658 return;
659 if (strcmp(point, INITSCOPE) == 0) {
660 if (point == cg)
661 *(point+1) = '\0';
662 else
663 *point = '\0';
664 }
665}
666
3db25a35 667/*
0dcc31ea
SH
668 * If pid is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
669 * If pid is in /a, he may act on /a/b, but not on /b.
3db25a35 670 * if the answer is false and nextcg is not NULL, then *nextcg will point
2c51f8dd
SH
671 * to a string containing the next cgroup directory under cg, which must be
672 * freed by the caller.
3db25a35
SH
673 */
674static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
675{
3db25a35 676 bool answer = false;
a8b6c3e0
SH
677 char *c2 = get_pid_cgroup(pid, contrl);
678 char *linecmp;
3db25a35 679
a8b6c3e0 680 if (!c2)
3db25a35 681 return false;
a8b6c3e0 682 prune_init_slice(c2);
3db25a35 683
a8b6c3e0
SH
684 /*
685 * callers pass in '/' for root cgroup, otherwise they pass
686 * in a cgroup without leading '/'
687 */
688 linecmp = *cg == '/' ? c2 : c2+1;
689 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
690 if (nextcg) {
691 *nextcg = get_next_cgroup_dir(linecmp, cg);
3db25a35 692 }
a8b6c3e0
SH
693 goto out;
694 }
695 answer = true;
696
697out:
698 free(c2);
699 return answer;
700}
701
702/*
0dcc31ea 703 * If pid is in /a/b/c, he may see that /a exists, but not /b or /a/c.
a8b6c3e0
SH
704 */
705static bool caller_may_see_dir(pid_t pid, const char *contrl, const char *cg)
706{
707 bool answer = false;
708 char *c2, *task_cg;
709 size_t target_len, task_len;
710
711 if (strcmp(cg, "/") == 0)
712 return true;
713
714 c2 = get_pid_cgroup(pid, contrl);
a8b6c3e0
SH
715 if (!c2)
716 return false;
ec3b236f 717 prune_init_slice(c2);
a8b6c3e0
SH
718
719 task_cg = c2 + 1;
720 target_len = strlen(cg);
721 task_len = strlen(task_cg);
a57cba3c
TA
722 if (task_len == 0) {
723 /* Task is in the root cg, it can see everything. This case is
724 * not handled by the strmcps below, since they test for the
725 * last /, but that is the first / that we've chopped off
726 * above.
727 */
728 answer = true;
729 goto out;
730 }
a8b6c3e0 731 if (strcmp(cg, task_cg) == 0) {
3db25a35
SH
732 answer = true;
733 goto out;
734 }
a8b6c3e0
SH
735 if (target_len < task_len) {
736 /* looking up a parent dir */
737 if (strncmp(task_cg, cg, target_len) == 0 && task_cg[target_len] == '/')
738 answer = true;
739 goto out;
740 }
741 if (target_len > task_len) {
742 /* looking up a child dir */
743 if (strncmp(task_cg, cg, task_len) == 0 && cg[task_len] == '/')
744 answer = true;
745 goto out;
746 }
3db25a35
SH
747
748out:
a8b6c3e0 749 free(c2);
3db25a35
SH
750 return answer;
751}
752
758ad80c 753/*
2c51f8dd
SH
754 * given /cgroup/freezer/a/b, return "freezer".
755 * the returned char* should NOT be freed.
758ad80c
SH
756 */
757static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
758{
759 const char *p1;
2c51f8dd 760 char *contr, *slash;
758ad80c
SH
761
762 if (strlen(path) < 9)
763 return NULL;
ac5d9d48
SH
764 if (*(path+7) != '/')
765 return NULL;
758ad80c 766 p1 = path+8;
2c51f8dd
SH
767 contr = strdupa(p1);
768 if (!contr)
769 return NULL;
770 slash = strstr(contr, "/");
758ad80c
SH
771 if (slash)
772 *slash = '\0';
773
758ad80c 774 int i;
35482f91
SH
775 for (i = 0; i < num_hierarchies; i++) {
776 if (hierarchies[i] && strcmp(hierarchies[i], contr) == 0)
777 return hierarchies[i];
758ad80c 778 }
758ad80c
SH
779 return NULL;
780}
781
782/*
783 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
784 * Note that the returned value may include files (keynames) etc
785 */
786static const char *find_cgroup_in_path(const char *path)
787{
788 const char *p1;
789
790 if (strlen(path) < 9)
791 return NULL;
792 p1 = strstr(path+8, "/");
793 if (!p1)
794 return NULL;
795 return p1+1;
796}
797
2c51f8dd 798/*
febf2b87
SH
799 * split the last path element from the path in @cg.
800 * @dir is newly allocated and should be freed, @last not
801*/
802static void get_cgdir_and_path(const char *cg, char **dir, char **last)
758ad80c 803{
758ad80c
SH
804 char *p;
805
2c51f8dd
SH
806 do {
807 *dir = strdup(cg);
808 } while (!*dir);
febf2b87
SH
809 *last = strrchr(cg, '/');
810 if (!*last) {
811 *last = NULL;
758ad80c
SH
812 return;
813 }
814 p = strrchr(*dir, '/');
815 *p = '\0';
816}
817
818/*
2ad6d2bd 819 * FUSE ops for /cgroup
758ad80c 820 */
2ad6d2bd 821
758ad80c
SH
822static int cg_getattr(const char *path, struct stat *sb)
823{
824 struct timespec now;
825 struct fuse_context *fc = fuse_get_context();
2c51f8dd 826 char * cgdir = NULL;
febf2b87 827 char *last = NULL, *path1, *path2;
35482f91 828 struct cgfs_files *k = NULL;
758ad80c 829 const char *cgroup;
2c51f8dd
SH
830 const char *controller = NULL;
831 int ret = -ENOENT;
758ad80c
SH
832
833
834 if (!fc)
835 return -EIO;
836
837 memset(sb, 0, sizeof(struct stat));
838
839 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
840 return -EINVAL;
841
842 sb->st_uid = sb->st_gid = 0;
843 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
844 sb->st_size = 0;
845
846 if (strcmp(path, "/cgroup") == 0) {
847 sb->st_mode = S_IFDIR | 00755;
848 sb->st_nlink = 2;
849 return 0;
850 }
851
852 controller = pick_controller_from_path(fc, path);
853 if (!controller)
854 return -EIO;
758ad80c
SH
855 cgroup = find_cgroup_in_path(path);
856 if (!cgroup) {
857 /* this is just /cgroup/controller, return it as a dir */
858 sb->st_mode = S_IFDIR | 00755;
859 sb->st_nlink = 2;
860 return 0;
861 }
341b21ad 862
febf2b87 863 get_cgdir_and_path(cgroup, &cgdir, &last);
758ad80c 864
febf2b87 865 if (!last) {
758ad80c
SH
866 path1 = "/";
867 path2 = cgdir;
868 } else {
869 path1 = cgdir;
febf2b87 870 path2 = last;
758ad80c
SH
871 }
872
b11c6ec0 873 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
874 if (initpid <= 0)
875 initpid = fc->pid;
758ad80c 876 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
febf2b87
SH
877 * Then check that caller's cgroup is under path if last is a child
878 * cgroup, or cgdir if last is a file */
758ad80c
SH
879
880 if (is_child_cgroup(controller, path1, path2)) {
0dcc31ea 881 if (!caller_may_see_dir(initpid, controller, cgroup)) {
a8b6c3e0
SH
882 ret = -ENOENT;
883 goto out;
884 }
0dcc31ea 885 if (!caller_is_in_ancestor(initpid, controller, cgroup, NULL)) {
f9a05025
SH
886 /* this is just /cgroup/controller, return it as a dir */
887 sb->st_mode = S_IFDIR | 00555;
888 sb->st_nlink = 2;
2c51f8dd
SH
889 ret = 0;
890 goto out;
891 }
892 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
893 ret = -EACCES;
894 goto out;
f9a05025 895 }
758ad80c 896
053a659d
SH
897 // get uid, gid, from '/tasks' file and make up a mode
898 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
899 sb->st_mode = S_IFDIR | 00755;
febf2b87 900 k = cgfs_get_key(controller, cgroup, NULL);
053a659d 901 if (!k) {
053a659d
SH
902 sb->st_uid = sb->st_gid = 0;
903 } else {
053a659d
SH
904 sb->st_uid = k->uid;
905 sb->st_gid = k->gid;
906 }
2c51f8dd 907 free_key(k);
758ad80c 908 sb->st_nlink = 2;
2c51f8dd
SH
909 ret = 0;
910 goto out;
758ad80c
SH
911 }
912
35482f91 913 if ((k = cgfs_get_key(controller, path1, path2)) != NULL) {
758ad80c 914 sb->st_mode = S_IFREG | k->mode;
053a659d 915 sb->st_nlink = 1;
758ad80c
SH
916 sb->st_uid = k->uid;
917 sb->st_gid = k->gid;
7253e0a4 918 sb->st_size = 0;
2c51f8dd 919 free_key(k);
0dcc31ea 920 if (!caller_is_in_ancestor(initpid, controller, path1, NULL)) {
adc3867b
SH
921 ret = -ENOENT;
922 goto out;
923 }
924 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY)) {
925 ret = -EACCES;
926 goto out;
927 }
2c51f8dd
SH
928
929 ret = 0;
758ad80c
SH
930 }
931
2c51f8dd
SH
932out:
933 free(cgdir);
934 return ret;
758ad80c 935}
2183082c 936
758ad80c 937static int cg_opendir(const char *path, struct fuse_file_info *fi)
2183082c 938{
7f163b71 939 struct fuse_context *fc = fuse_get_context();
7f163b71 940 const char *cgroup;
c688e1b3 941 struct file_info *dir_info;
2c51f8dd 942 char *controller = NULL;
7f163b71
SH
943
944 if (!fc)
945 return -EIO;
946
c688e1b3
SH
947 if (strcmp(path, "/cgroup") == 0) {
948 cgroup = NULL;
949 controller = NULL;
950 } else {
951 // return list of keys for the controller, and list of child cgroups
952 controller = pick_controller_from_path(fc, path);
953 if (!controller)
954 return -EIO;
7f163b71 955
c688e1b3
SH
956 cgroup = find_cgroup_in_path(path);
957 if (!cgroup) {
958 /* this is just /cgroup/controller, return its contents */
959 cgroup = "/";
960 }
7f163b71
SH
961 }
962
b11c6ec0 963 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
964 if (initpid <= 0)
965 initpid = fc->pid;
a8b6c3e0 966 if (cgroup) {
0dcc31ea 967 if (!caller_may_see_dir(initpid, controller, cgroup))
a8b6c3e0
SH
968 return -ENOENT;
969 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
970 return -EACCES;
2c51f8dd 971 }
c688e1b3
SH
972
973 /* we'll free this at cg_releasedir */
2c51f8dd
SH
974 dir_info = malloc(sizeof(*dir_info));
975 if (!dir_info)
976 return -ENOMEM;
35482f91
SH
977 dir_info->controller = must_copy_string(controller);
978 dir_info->cgroup = must_copy_string(cgroup);
443d13f5 979 dir_info->type = LXC_TYPE_CGDIR;
c688e1b3 980 dir_info->buf = NULL;
8f6e8f5e 981 dir_info->file = NULL;
c688e1b3
SH
982 dir_info->buflen = 0;
983
984 fi->fh = (unsigned long)dir_info;
758ad80c
SH
985 return 0;
986}
987
758ad80c
SH
988static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
989 struct fuse_file_info *fi)
990{
c688e1b3 991 struct file_info *d = (struct file_info *)fi->fh;
35482f91 992 struct cgfs_files **list = NULL;
2c51f8dd
SH
993 int i, ret;
994 char *nextcg = NULL;
758ad80c 995 struct fuse_context *fc = fuse_get_context();
2c51f8dd 996 char **clist = NULL;
758ad80c 997
443d13f5 998 if (d->type != LXC_TYPE_CGDIR) {
b845ad01
SH
999 fprintf(stderr, "Internal error: file cache info used in readdir\n");
1000 return -EIO;
1001 }
c688e1b3
SH
1002 if (!d->cgroup && !d->controller) {
1003 // ls /var/lib/lxcfs/cgroup - just show list of controllers
758ad80c
SH
1004 int i;
1005
35482f91
SH
1006 for (i = 0; i < num_hierarchies; i++) {
1007 if (hierarchies[i] && filler(buf, hierarchies[i], NULL, 0) != 0) {
758ad80c
SH
1008 return -EIO;
1009 }
1010 }
1011 return 0;
1012 }
1013
35482f91 1014 if (!cgfs_list_keys(d->controller, d->cgroup, &list)) {
3db25a35 1015 // not a valid cgroup
2c51f8dd
SH
1016 ret = -EINVAL;
1017 goto out;
1018 }
3db25a35 1019
b11c6ec0 1020 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
1021 if (initpid <= 0)
1022 initpid = fc->pid;
0dcc31ea 1023 if (!caller_is_in_ancestor(initpid, d->controller, d->cgroup, &nextcg)) {
3db25a35
SH
1024 if (nextcg) {
1025 int ret;
1026 ret = filler(buf, nextcg, NULL, 0);
2c51f8dd
SH
1027 free(nextcg);
1028 if (ret != 0) {
1029 ret = -EIO;
1030 goto out;
1031 }
3db25a35 1032 }
2c51f8dd
SH
1033 ret = 0;
1034 goto out;
3db25a35
SH
1035 }
1036
758ad80c 1037 for (i = 0; list[i]; i++) {
758ad80c 1038 if (filler(buf, list[i]->name, NULL, 0) != 0) {
2c51f8dd
SH
1039 ret = -EIO;
1040 goto out;
758ad80c
SH
1041 }
1042 }
1043
1044 // now get the list of child cgroups
758ad80c 1045
35482f91 1046 if (!cgfs_list_children(d->controller, d->cgroup, &clist)) {
2c51f8dd
SH
1047 ret = 0;
1048 goto out;
1049 }
758ad80c 1050 for (i = 0; clist[i]; i++) {
758ad80c 1051 if (filler(buf, clist[i], NULL, 0) != 0) {
2c51f8dd
SH
1052 ret = -EIO;
1053 goto out;
758ad80c
SH
1054 }
1055 }
2c51f8dd
SH
1056 ret = 0;
1057
1058out:
1059 free_keys(list);
1060 if (clist) {
1061 for (i = 0; clist[i]; i++)
1062 free(clist[i]);
1063 free(clist);
1064 }
1065 return ret;
758ad80c
SH
1066}
1067
8f6e8f5e
SH
1068static void do_release_file_info(struct file_info *f)
1069{
2c51f8dd
SH
1070 if (!f)
1071 return;
1072 free(f->controller);
1073 free(f->cgroup);
1074 free(f->file);
1075 free(f->buf);
1076 free(f);
8f6e8f5e
SH
1077}
1078
758ad80c
SH
1079static int cg_releasedir(const char *path, struct fuse_file_info *fi)
1080{
c688e1b3
SH
1081 struct file_info *d = (struct file_info *)fi->fh;
1082
8f6e8f5e 1083 do_release_file_info(d);
758ad80c
SH
1084 return 0;
1085}
1086
99978832
SH
1087static int cg_open(const char *path, struct fuse_file_info *fi)
1088{
99978832 1089 const char *cgroup;
febf2b87 1090 char *last = NULL, *path1, *path2, * cgdir = NULL, *controller;
35482f91 1091 struct cgfs_files *k = NULL;
8f6e8f5e 1092 struct file_info *file_info;
99978832 1093 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1094 int ret;
99978832
SH
1095
1096 if (!fc)
1097 return -EIO;
1098
1099 controller = pick_controller_from_path(fc, path);
1100 if (!controller)
1101 return -EIO;
1102 cgroup = find_cgroup_in_path(path);
1103 if (!cgroup)
1104 return -EINVAL;
1105
febf2b87
SH
1106 get_cgdir_and_path(cgroup, &cgdir, &last);
1107 if (!last) {
99978832
SH
1108 path1 = "/";
1109 path2 = cgdir;
1110 } else {
1111 path1 = cgdir;
febf2b87 1112 path2 = last;
99978832
SH
1113 }
1114
35482f91 1115 k = cgfs_get_key(controller, path1, path2);
2c51f8dd
SH
1116 if (!k) {
1117 ret = -EINVAL;
1118 goto out;
1119 }
1120 free_key(k);
99978832 1121
b11c6ec0 1122 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
1123 if (initpid <= 0)
1124 initpid = fc->pid;
0dcc31ea 1125 if (!caller_may_see_dir(initpid, controller, path1)) {
a8b6c3e0
SH
1126 ret = -ENOENT;
1127 goto out;
1128 }
2c51f8dd 1129 if (!fc_may_access(fc, controller, path1, path2, fi->flags)) {
8f6e8f5e 1130 // should never get here
2c51f8dd
SH
1131 ret = -EACCES;
1132 goto out;
1133 }
99978832 1134
8f6e8f5e 1135 /* we'll free this at cg_release */
2c51f8dd
SH
1136 file_info = malloc(sizeof(*file_info));
1137 if (!file_info) {
1138 ret = -ENOMEM;
1139 goto out;
1140 }
35482f91
SH
1141 file_info->controller = must_copy_string(controller);
1142 file_info->cgroup = must_copy_string(path1);
1143 file_info->file = must_copy_string(path2);
443d13f5 1144 file_info->type = LXC_TYPE_CGFILE;
8f6e8f5e
SH
1145 file_info->buf = NULL;
1146 file_info->buflen = 0;
1147
1148 fi->fh = (unsigned long)file_info;
2c51f8dd
SH
1149 ret = 0;
1150
1151out:
1152 free(cgdir);
1153 return ret;
8f6e8f5e
SH
1154}
1155
1156static int cg_release(const char *path, struct fuse_file_info *fi)
1157{
1158 struct file_info *f = (struct file_info *)fi->fh;
1159
1160 do_release_file_info(f);
1161 return 0;
99978832
SH
1162}
1163
5b2dfd85
SH
1164#define POLLIN_SET ( EPOLLIN | EPOLLHUP | EPOLLRDHUP )
1165
1166static bool wait_for_sock(int sock, int timeout)
a05660a6 1167{
5b2dfd85 1168 struct epoll_event ev;
c26e12cb 1169 int epfd, ret, now, starttime, deltatime, saved_errno;
5b2dfd85 1170
c26e12cb
SH
1171 if ((starttime = time(NULL)) < 0)
1172 return false;
1173
1174 if ((epfd = epoll_create(1)) < 0) {
5b2dfd85
SH
1175 fprintf(stderr, "Failed to create epoll socket: %m\n");
1176 return false;
1177 }
1178
1179 ev.events = POLLIN_SET;
1180 ev.data.fd = sock;
1181 if (epoll_ctl(epfd, EPOLL_CTL_ADD, sock, &ev) < 0) {
1182 fprintf(stderr, "Failed adding socket to epoll: %m\n");
1183 close(epfd);
1184 return false;
1185 }
1186
c26e12cb
SH
1187again:
1188 if ((now = time(NULL)) < 0) {
1189 close(epfd);
1190 return false;
1191 }
a05660a6 1192
c26e12cb
SH
1193 deltatime = (starttime + timeout) - now;
1194 if (deltatime < 0) { // timeout
1195 errno = 0;
b11c6ec0 1196 close(epfd);
5b2dfd85 1197 return false;
c26e12cb
SH
1198 }
1199 ret = epoll_wait(epfd, &ev, 1, 1000*deltatime + 1);
1200 if (ret < 0 && errno == EINTR)
1201 goto again;
1202 saved_errno = errno;
1203 close(epfd);
1204
1205 if (ret <= 0) {
1206 errno = saved_errno;
5b2dfd85
SH
1207 return false;
1208 }
1209 return true;
1210}
a05660a6 1211
5b2dfd85
SH
1212static int msgrecv(int sockfd, void *buf, size_t len)
1213{
1214 if (!wait_for_sock(sockfd, 2))
a05660a6
SH
1215 return -1;
1216 return recv(sockfd, buf, len, MSG_DONTWAIT);
1217}
1218
01e71852 1219static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
a05660a6
SH
1220{
1221 struct msghdr msg = { 0 };
1222 struct iovec iov;
1223 struct cmsghdr *cmsg;
1224 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
1225 char buf[1];
1226 buf[0] = 'p';
1227
01e71852
SH
1228 if (pingfirst) {
1229 if (msgrecv(sock, buf, 1) != 1) {
1420baf8 1230 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
01e71852
SH
1231 __func__);
1232 return SEND_CREDS_FAIL;
1233 }
a05660a6
SH
1234 }
1235
1236 msg.msg_control = cmsgbuf;
1237 msg.msg_controllen = sizeof(cmsgbuf);
1238
1239 cmsg = CMSG_FIRSTHDR(&msg);
1240 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
1241 cmsg->cmsg_level = SOL_SOCKET;
1242 cmsg->cmsg_type = SCM_CREDENTIALS;
1243 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
1244
1245 msg.msg_name = NULL;
1246 msg.msg_namelen = 0;
1247
1248 buf[0] = v;
1249 iov.iov_base = buf;
1250 iov.iov_len = sizeof(buf);
1251 msg.msg_iov = &iov;
1252 msg.msg_iovlen = 1;
1253
1254 if (sendmsg(sock, &msg, 0) < 0) {
1420baf8 1255 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
a05660a6
SH
1256 strerror(errno));
1257 if (errno == 3)
01e71852
SH
1258 return SEND_CREDS_NOTSK;
1259 return SEND_CREDS_FAIL;
a05660a6
SH
1260 }
1261
01e71852 1262 return SEND_CREDS_OK;
a05660a6
SH
1263}
1264
1265static bool recv_creds(int sock, struct ucred *cred, char *v)
1266{
1267 struct msghdr msg = { 0 };
1268 struct iovec iov;
1269 struct cmsghdr *cmsg;
1270 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
1271 char buf[1];
1272 int ret;
1273 int optval = 1;
1274
1275 *v = '1';
1276
1277 cred->pid = -1;
1278 cred->uid = -1;
1279 cred->gid = -1;
1280
1281 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
1420baf8 1282 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
a05660a6
SH
1283 return false;
1284 }
1285 buf[0] = '1';
1286 if (write(sock, buf, 1) != 1) {
1420baf8 1287 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
a05660a6
SH
1288 return false;
1289 }
1290
1291 msg.msg_name = NULL;
1292 msg.msg_namelen = 0;
1293 msg.msg_control = cmsgbuf;
1294 msg.msg_controllen = sizeof(cmsgbuf);
1295
1296 iov.iov_base = buf;
1297 iov.iov_len = sizeof(buf);
1298 msg.msg_iov = &iov;
1299 msg.msg_iovlen = 1;
1300
5b2dfd85
SH
1301 if (!wait_for_sock(sock, 2)) {
1302 fprintf(stderr, "Timed out waiting for scm_cred: %s\n",
6ee867dc
SH
1303 strerror(errno));
1304 return false;
1305 }
1306 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
a05660a6 1307 if (ret < 0) {
1420baf8 1308 fprintf(stderr, "Failed to receive scm_cred: %s\n",
a05660a6
SH
1309 strerror(errno));
1310 return false;
1311 }
1312
1313 cmsg = CMSG_FIRSTHDR(&msg);
1314
1315 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
1316 cmsg->cmsg_level == SOL_SOCKET &&
1317 cmsg->cmsg_type == SCM_CREDENTIALS) {
1318 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
1319 }
1320 *v = buf[0];
1321
1322 return true;
1323}
1324
1325
1326/*
4775fba1
SH
1327 * pid_to_ns - reads pids from a ucred over a socket, then writes the
1328 * int value back over the socket. This shifts the pid from the
1329 * sender's pidns into tpid's pidns.
a05660a6 1330 */
4775fba1 1331static void pid_to_ns(int sock, pid_t tpid)
a05660a6
SH
1332{
1333 char v = '0';
1334 struct ucred cred;
1335
1336 while (recv_creds(sock, &cred, &v)) {
1337 if (v == '1')
67bd113f 1338 _exit(0);
a05660a6 1339 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
67bd113f 1340 _exit(1);
a05660a6 1341 }
67bd113f 1342 _exit(0);
a05660a6
SH
1343}
1344
1345/*
4775fba1 1346 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
a05660a6 1347 * in your old pidns. Only children which you fork will be in the target
4775fba1 1348 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
a05660a6
SH
1349 * actually convert pids
1350 */
4775fba1 1351static void pid_to_ns_wrapper(int sock, pid_t tpid)
a05660a6 1352{
ea56f722 1353 int newnsfd = -1, ret, cpipe[2];
a05660a6
SH
1354 char fnam[100];
1355 pid_t cpid;
ea56f722 1356 char v;
a05660a6 1357
c0adec85
SH
1358 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1359 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1360 _exit(1);
a05660a6
SH
1361 newnsfd = open(fnam, O_RDONLY);
1362 if (newnsfd < 0)
67bd113f 1363 _exit(1);
a05660a6 1364 if (setns(newnsfd, 0) < 0)
67bd113f 1365 _exit(1);
a05660a6
SH
1366 close(newnsfd);
1367
ea56f722 1368 if (pipe(cpipe) < 0)
67bd113f 1369 _exit(1);
a05660a6 1370
ea56f722 1371 cpid = fork();
a05660a6 1372 if (cpid < 0)
67bd113f 1373 _exit(1);
ea56f722
SH
1374
1375 if (!cpid) {
1376 char b = '1';
1377 close(cpipe[0]);
1378 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1379 fprintf(stderr, "%s (child): erorr on write: %s\n",
1380 __func__, strerror(errno));
1381 }
1382 close(cpipe[1]);
4775fba1 1383 pid_to_ns(sock, tpid);
ff96a5f9 1384 _exit(1); // not reached
ea56f722
SH
1385 }
1386 // give the child 1 second to be done forking and
ff96a5f9 1387 // write its ack
5b2dfd85 1388 if (!wait_for_sock(cpipe[0], 1))
ff96a5f9 1389 _exit(1);
ea56f722 1390 ret = read(cpipe[0], &v, 1);
ff96a5f9
SH
1391 if (ret != sizeof(char) || v != '1')
1392 _exit(1);
ea56f722 1393
a05660a6 1394 if (!wait_for_pid(cpid))
67bd113f
SH
1395 _exit(1);
1396 _exit(0);
a05660a6
SH
1397}
1398
1399/*
1400 * To read cgroup files with a particular pid, we will setns into the child
1401 * pidns, open a pipe, fork a child - which will be the first to really be in
35482f91 1402 * the child ns - which does the cgfs_get_value and writes the data to the pipe.
a05660a6
SH
1403 */
1404static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
1405{
1406 int sock[2] = {-1, -1};
2c51f8dd 1407 char *tmpdata = NULL;
a05660a6
SH
1408 int ret;
1409 pid_t qpid, cpid = -1;
1410 bool answer = false;
1411 char v = '0';
1412 struct ucred cred;
2c51f8dd 1413 size_t sz = 0, asz = 0;
a05660a6 1414
35482f91 1415 if (!cgfs_get_value(contrl, cg, file, &tmpdata))
a05660a6
SH
1416 return false;
1417
1418 /*
1419 * Now we read the pids from returned data one by one, pass
1420 * them into a child in the target namespace, read back the
1421 * translated pids, and put them into our to-return data
1422 */
1423
1424 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1425 perror("socketpair");
2c51f8dd
SH
1426 free(tmpdata);
1427 return false;
a05660a6
SH
1428 }
1429
1430 cpid = fork();
1431 if (cpid == -1)
1432 goto out;
1433
ff96a5f9 1434 if (!cpid) // child - exits when done
4775fba1 1435 pid_to_ns_wrapper(sock[1], tpid);
a05660a6
SH
1436
1437 char *ptr = tmpdata;
1438 cred.uid = 0;
1439 cred.gid = 0;
1440 while (sscanf(ptr, "%d\n", &qpid) == 1) {
1441 cred.pid = qpid;
01e71852
SH
1442 ret = send_creds(sock[0], &cred, v, true);
1443
1444 if (ret == SEND_CREDS_NOTSK)
1445 goto next;
1446 if (ret == SEND_CREDS_FAIL)
a05660a6
SH
1447 goto out;
1448
1449 // read converted results
5b2dfd85
SH
1450 if (!wait_for_sock(sock[0], 2)) {
1451 fprintf(stderr, "%s: timed out waiting for pid from child: %s\n",
6ee867dc 1452 __func__, strerror(errno));
a05660a6
SH
1453 goto out;
1454 }
1455 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1456 fprintf(stderr, "%s: error reading pid from child: %s\n",
1457 __func__, strerror(errno));
a05660a6
SH
1458 goto out;
1459 }
2c51f8dd 1460 must_strcat_pid(d, &sz, &asz, qpid);
01e71852 1461next:
a05660a6
SH
1462 ptr = strchr(ptr, '\n');
1463 if (!ptr)
1464 break;
1465 ptr++;
1466 }
1467
1468 cred.pid = getpid();
1469 v = '1';
01e71852 1470 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
a05660a6 1471 // failed to ask child to exit
6ee867dc
SH
1472 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
1473 __func__, strerror(errno));
a05660a6
SH
1474 goto out;
1475 }
1476
1477 answer = true;
1478
1479out:
2c51f8dd 1480 free(tmpdata);
a05660a6
SH
1481 if (cpid != -1)
1482 wait_for_pid(cpid);
1483 if (sock[0] != -1) {
1484 close(sock[0]);
1485 close(sock[1]);
1486 }
1487 return answer;
1488}
1489
99978832
SH
1490static int cg_read(const char *path, char *buf, size_t size, off_t offset,
1491 struct fuse_file_info *fi)
1492{
99978832 1493 struct fuse_context *fc = fuse_get_context();
8f6e8f5e 1494 struct file_info *f = (struct file_info *)fi->fh;
35482f91 1495 struct cgfs_files *k = NULL;
2c51f8dd
SH
1496 char *data = NULL;
1497 int ret, s;
1498 bool r;
99978832 1499
443d13f5 1500 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1501 fprintf(stderr, "Internal error: directory cache info used in cg_read\n");
1502 return -EIO;
1503 }
1504
99978832 1505 if (offset)
7253e0a4 1506 return 0;
99978832
SH
1507
1508 if (!fc)
1509 return -EIO;
1510
8f6e8f5e 1511 if (!f->controller)
99978832
SH
1512 return -EINVAL;
1513
35482f91 1514 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
2c51f8dd
SH
1515 return -EINVAL;
1516 }
1517 free_key(k);
99978832 1518
99978832 1519
2c51f8dd
SH
1520 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY)) { // should never get here
1521 ret = -EACCES;
1522 goto out;
1523 }
a05660a6 1524
2c51f8dd
SH
1525 if (strcmp(f->file, "tasks") == 0 ||
1526 strcmp(f->file, "/tasks") == 0 ||
1527 strcmp(f->file, "/cgroup.procs") == 0 ||
1528 strcmp(f->file, "cgroup.procs") == 0)
1529 // special case - we have to translate the pids
1530 r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data);
1531 else
35482f91 1532 r = cgfs_get_value(f->controller, f->cgroup, f->file, &data);
99978832 1533
2c51f8dd
SH
1534 if (!r) {
1535 ret = -EINVAL;
1536 goto out;
1537 }
99978832 1538
2c51f8dd
SH
1539 if (!data) {
1540 ret = 0;
1541 goto out;
99978832 1542 }
2c51f8dd
SH
1543 s = strlen(data);
1544 if (s > size)
1545 s = size;
1546 memcpy(buf, data, s);
1547 if (s > 0 && s < size && data[s-1] != '\n')
1548 buf[s++] = '\n';
99978832 1549
2c51f8dd
SH
1550 ret = s;
1551
1552out:
1553 free(data);
1554 return ret;
99978832
SH
1555}
1556
4775fba1
SH
1557static void pid_from_ns(int sock, pid_t tpid)
1558{
1559 pid_t vpid;
1560 struct ucred cred;
1561 char v;
6ee867dc 1562 int ret;
4775fba1
SH
1563
1564 cred.uid = 0;
1565 cred.gid = 0;
6ee867dc 1566 while (1) {
5b2dfd85
SH
1567 if (!wait_for_sock(sock, 2)) {
1568 fprintf(stderr, "%s: timeout reading from parent\n", __func__);
67bd113f 1569 _exit(1);
6ee867dc
SH
1570 }
1571 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
1572 fprintf(stderr, "%s: bad read from parent: %s\n",
1573 __func__, strerror(errno));
67bd113f 1574 _exit(1);
6ee867dc 1575 }
4775fba1 1576 if (vpid == -1) // done
01e71852 1577 break;
4775fba1
SH
1578 v = '0';
1579 cred.pid = vpid;
01e71852 1580 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
4775fba1
SH
1581 v = '1';
1582 cred.pid = getpid();
01e71852 1583 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
67bd113f 1584 _exit(1);
4775fba1
SH
1585 }
1586 }
67bd113f 1587 _exit(0);
4775fba1
SH
1588}
1589
1590static void pid_from_ns_wrapper(int sock, pid_t tpid)
1591{
ea56f722 1592 int newnsfd = -1, ret, cpipe[2];
4775fba1
SH
1593 char fnam[100];
1594 pid_t cpid;
ea56f722 1595 char v;
4775fba1 1596
c0adec85
SH
1597 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1598 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1599 _exit(1);
4775fba1
SH
1600 newnsfd = open(fnam, O_RDONLY);
1601 if (newnsfd < 0)
67bd113f 1602 _exit(1);
4775fba1 1603 if (setns(newnsfd, 0) < 0)
67bd113f 1604 _exit(1);
4775fba1
SH
1605 close(newnsfd);
1606
ea56f722 1607 if (pipe(cpipe) < 0)
67bd113f 1608 _exit(1);
ea56f722
SH
1609
1610loop:
4775fba1
SH
1611 cpid = fork();
1612
1613 if (cpid < 0)
67bd113f 1614 _exit(1);
ea56f722
SH
1615
1616 if (!cpid) {
1617 char b = '1';
1618 close(cpipe[0]);
1619 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1620 fprintf(stderr, "%s (child): erorr on write: %s\n",
1621 __func__, strerror(errno));
1622 }
1623 close(cpipe[1]);
4775fba1 1624 pid_from_ns(sock, tpid);
ea56f722
SH
1625 }
1626
1627 // give the child 1 second to be done forking and
c26e12cb 1628 // write its ack
5b2dfd85 1629 if (!wait_for_sock(cpipe[0], 1))
ea56f722
SH
1630 goto again;
1631 ret = read(cpipe[0], &v, 1);
1632 if (ret != sizeof(char) || v != '1') {
1633 goto again;
1634 }
1635
4775fba1 1636 if (!wait_for_pid(cpid))
67bd113f
SH
1637 _exit(1);
1638 _exit(0);
ea56f722
SH
1639
1640again:
1641 kill(cpid, SIGKILL);
1642 wait_for_pid(cpid);
1643 goto loop;
4775fba1
SH
1644}
1645
8ee2a503
SH
1646/*
1647 * Given host @uid, return the uid to which it maps in
1648 * @pid's user namespace, or -1 if none.
1649 */
1650bool hostuid_to_ns(uid_t uid, pid_t pid, uid_t *answer)
1651{
1652 FILE *f;
1653 char line[400];
1654
1655 sprintf(line, "/proc/%d/uid_map", pid);
1656 if ((f = fopen(line, "r")) == NULL) {
1657 return false;
1658 }
1659
1660 *answer = convert_id_to_ns(f, uid);
1661 fclose(f);
1662
1663 if (*answer == -1)
1664 return false;
1665 return true;
1666}
1667
1668/*
1669 * get_pid_creds: get the real uid and gid of @pid from
1670 * /proc/$$/status
1671 * (XXX should we use euid here?)
1672 */
1673void get_pid_creds(pid_t pid, uid_t *uid, gid_t *gid)
1674{
1675 char line[400];
1676 uid_t u;
1677 gid_t g;
1678 FILE *f;
1679
1680 *uid = -1;
1681 *gid = -1;
1682 sprintf(line, "/proc/%d/status", pid);
1683 if ((f = fopen(line, "r")) == NULL) {
1684 fprintf(stderr, "Error opening %s: %s\n", line, strerror(errno));
1685 return;
1686 }
1687 while (fgets(line, 400, f)) {
1688 if (strncmp(line, "Uid:", 4) == 0) {
1689 if (sscanf(line+4, "%u", &u) != 1) {
1690 fprintf(stderr, "bad uid line for pid %u\n", pid);
1691 fclose(f);
1692 return;
1693 }
1694 *uid = u;
1695 } else if (strncmp(line, "Gid:", 4) == 0) {
1696 if (sscanf(line+4, "%u", &g) != 1) {
1697 fprintf(stderr, "bad gid line for pid %u\n", pid);
1698 fclose(f);
1699 return;
1700 }
1701 *gid = g;
1702 }
1703 }
1704 fclose(f);
1705}
1706
1707/*
1708 * May the requestor @r move victim @v to a new cgroup?
1709 * This is allowed if
1710 * . they are the same task
1711 * . they are ownedy by the same uid
1712 * . @r is root on the host, or
1713 * . @v's uid is mapped into @r's where @r is root.
1714 */
1715bool may_move_pid(pid_t r, uid_t r_uid, pid_t v)
1716{
1717 uid_t v_uid, tmpuid;
1718 gid_t v_gid;
1719
1720 if (r == v)
1721 return true;
1722 if (r_uid == 0)
1723 return true;
1724 get_pid_creds(v, &v_uid, &v_gid);
1725 if (r_uid == v_uid)
1726 return true;
1727 if (hostuid_to_ns(r_uid, r, &tmpuid) && tmpuid == 0
1728 && hostuid_to_ns(v_uid, r, &tmpuid))
1729 return true;
1730 return false;
1731}
1732
1733static bool do_write_pids(pid_t tpid, uid_t tuid, const char *contrl, const char *cg,
1734 const char *file, const char *buf)
4775fba1
SH
1735{
1736 int sock[2] = {-1, -1};
1737 pid_t qpid, cpid = -1;
35482f91 1738 FILE *pids_file = NULL;
4775fba1
SH
1739 bool answer = false, fail = false;
1740
35482f91
SH
1741 pids_file = open_pids_file(contrl, cg);
1742 if (!pids_file)
1743 return false;
1744
4775fba1
SH
1745 /*
1746 * write the pids to a socket, have helper in writer's pidns
1747 * call movepid for us
1748 */
1749 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1750 perror("socketpair");
35482f91 1751 goto out;
4775fba1
SH
1752 }
1753
1754 cpid = fork();
1755 if (cpid == -1)
1756 goto out;
1757
35482f91
SH
1758 if (!cpid) { // child
1759 fclose(pids_file);
4775fba1 1760 pid_from_ns_wrapper(sock[1], tpid);
35482f91 1761 }
4775fba1
SH
1762
1763 const char *ptr = buf;
1764 while (sscanf(ptr, "%d", &qpid) == 1) {
1765 struct ucred cred;
1766 char v;
1767
1768 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1769 fprintf(stderr, "%s: error writing pid to child: %s\n",
1770 __func__, strerror(errno));
4775fba1
SH
1771 goto out;
1772 }
1773
01e71852
SH
1774 if (recv_creds(sock[0], &cred, &v)) {
1775 if (v == '0') {
8ee2a503
SH
1776 if (!may_move_pid(tpid, tuid, cred.pid)) {
1777 fail = true;
1778 break;
1779 }
35482f91 1780 if (fprintf(pids_file, "%d", (int) cred.pid) < 0)
01e71852
SH
1781 fail = true;
1782 }
4775fba1
SH
1783 }
1784
1785 ptr = strchr(ptr, '\n');
1786 if (!ptr)
1787 break;
1788 ptr++;
1789 }
1790
1791 /* All good, write the value */
1792 qpid = -1;
1793 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
1420baf8 1794 fprintf(stderr, "Warning: failed to ask child to exit\n");
4775fba1
SH
1795
1796 if (!fail)
1797 answer = true;
1798
1799out:
1800 if (cpid != -1)
1801 wait_for_pid(cpid);
1802 if (sock[0] != -1) {
1803 close(sock[0]);
1804 close(sock[1]);
1805 }
35482f91
SH
1806 if (pids_file) {
1807 if (fclose(pids_file) != 0)
1808 answer = false;
1809 }
4775fba1
SH
1810 return answer;
1811}
1812
2ad6d2bd
SH
1813int cg_write(const char *path, const char *buf, size_t size, off_t offset,
1814 struct fuse_file_info *fi)
1815{
2ad6d2bd 1816 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1817 char *localbuf = NULL;
35482f91 1818 struct cgfs_files *k = NULL;
8f6e8f5e 1819 struct file_info *f = (struct file_info *)fi->fh;
2c51f8dd 1820 bool r;
2ad6d2bd 1821
443d13f5 1822 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1823 fprintf(stderr, "Internal error: directory cache info used in cg_write\n");
1824 return -EIO;
1825 }
1826
2ad6d2bd 1827 if (offset)
7253e0a4 1828 return 0;
2ad6d2bd
SH
1829
1830 if (!fc)
1831 return -EIO;
1832
2c51f8dd 1833 localbuf = alloca(size+1);
47cbf0e5
SH
1834 localbuf[size] = '\0';
1835 memcpy(localbuf, buf, size);
2ad6d2bd 1836
35482f91 1837 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
2c51f8dd
SH
1838 size = -EINVAL;
1839 goto out;
1840 }
2ad6d2bd 1841
2c51f8dd
SH
1842 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY)) {
1843 size = -EACCES;
1844 goto out;
1845 }
4775fba1 1846
2c51f8dd
SH
1847 if (strcmp(f->file, "tasks") == 0 ||
1848 strcmp(f->file, "/tasks") == 0 ||
1849 strcmp(f->file, "/cgroup.procs") == 0 ||
1850 strcmp(f->file, "cgroup.procs") == 0)
1851 // special case - we have to translate the pids
8ee2a503 1852 r = do_write_pids(fc->pid, fc->uid, f->controller, f->cgroup, f->file, localbuf);
2c51f8dd 1853 else
35482f91 1854 r = cgfs_set_value(f->controller, f->cgroup, f->file, localbuf);
2ad6d2bd 1855
2c51f8dd
SH
1856 if (!r)
1857 size = -EINVAL;
2ad6d2bd 1858
2c51f8dd
SH
1859out:
1860 free_key(k);
1861 return size;
2ad6d2bd
SH
1862}
1863
341b21ad
SH
1864int cg_chown(const char *path, uid_t uid, gid_t gid)
1865{
1866 struct fuse_context *fc = fuse_get_context();
febf2b87 1867 char *cgdir = NULL, *last = NULL, *path1, *path2, *controller;
35482f91 1868 struct cgfs_files *k = NULL;
341b21ad 1869 const char *cgroup;
2c51f8dd 1870 int ret;
341b21ad
SH
1871
1872 if (!fc)
1873 return -EIO;
1874
1875 if (strcmp(path, "/cgroup") == 0)
1876 return -EINVAL;
1877
1878 controller = pick_controller_from_path(fc, path);
1879 if (!controller)
f9a05025 1880 return -EINVAL;
341b21ad
SH
1881 cgroup = find_cgroup_in_path(path);
1882 if (!cgroup)
1883 /* this is just /cgroup/controller */
1884 return -EINVAL;
1885
febf2b87 1886 get_cgdir_and_path(cgroup, &cgdir, &last);
341b21ad 1887
febf2b87 1888 if (!last) {
341b21ad
SH
1889 path1 = "/";
1890 path2 = cgdir;
1891 } else {
1892 path1 = cgdir;
febf2b87 1893 path2 = last;
341b21ad
SH
1894 }
1895
1896 if (is_child_cgroup(controller, path1, path2)) {
1897 // get uid, gid, from '/tasks' file and make up a mode
1898 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
35482f91 1899 k = cgfs_get_key(controller, cgroup, "tasks");
341b21ad
SH
1900
1901 } else
35482f91 1902 k = cgfs_get_key(controller, path1, path2);
341b21ad 1903
2c51f8dd
SH
1904 if (!k) {
1905 ret = -EINVAL;
1906 goto out;
1907 }
341b21ad
SH
1908
1909 /*
1910 * This being a fuse request, the uid and gid must be valid
1911 * in the caller's namespace. So we can just check to make
1912 * sure that the caller is root in his uid, and privileged
1913 * over the file's current owner.
1914 */
2c51f8dd
SH
1915 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) {
1916 ret = -EACCES;
1917 goto out;
1918 }
341b21ad 1919
1f69d62e 1920 ret = cgfs_chown_file(controller, cgroup, uid, gid);
2c51f8dd
SH
1921
1922out:
1923 free_key(k);
1924 free(cgdir);
1925
1926 return ret;
341b21ad 1927}
2ad6d2bd 1928
fd2e4e03
SH
1929int cg_chmod(const char *path, mode_t mode)
1930{
0a1bb5ea 1931 struct fuse_context *fc = fuse_get_context();
febf2b87 1932 char * cgdir = NULL, *last = NULL, *path1, *path2, *controller;
35482f91 1933 struct cgfs_files *k = NULL;
0a1bb5ea 1934 const char *cgroup;
2c51f8dd 1935 int ret;
0a1bb5ea
SH
1936
1937 if (!fc)
1938 return -EIO;
1939
1940 if (strcmp(path, "/cgroup") == 0)
1941 return -EINVAL;
1942
1943 controller = pick_controller_from_path(fc, path);
1944 if (!controller)
f9a05025 1945 return -EINVAL;
0a1bb5ea
SH
1946 cgroup = find_cgroup_in_path(path);
1947 if (!cgroup)
1948 /* this is just /cgroup/controller */
1949 return -EINVAL;
1950
febf2b87 1951 get_cgdir_and_path(cgroup, &cgdir, &last);
0a1bb5ea 1952
febf2b87 1953 if (!last) {
0a1bb5ea
SH
1954 path1 = "/";
1955 path2 = cgdir;
1956 } else {
1957 path1 = cgdir;
febf2b87 1958 path2 = last;
0a1bb5ea
SH
1959 }
1960
1961 if (is_child_cgroup(controller, path1, path2)) {
1962 // get uid, gid, from '/tasks' file and make up a mode
1963 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
35482f91 1964 k = cgfs_get_key(controller, cgroup, "tasks");
0a1bb5ea
SH
1965
1966 } else
35482f91 1967 k = cgfs_get_key(controller, path1, path2);
0a1bb5ea 1968
2c51f8dd
SH
1969 if (!k) {
1970 ret = -EINVAL;
1971 goto out;
1972 }
0a1bb5ea
SH
1973
1974 /*
1975 * This being a fuse request, the uid and gid must be valid
1976 * in the caller's namespace. So we can just check to make
1977 * sure that the caller is root in his uid, and privileged
1978 * over the file's current owner.
1979 */
2c51f8dd
SH
1980 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
1981 ret = -EPERM;
1982 goto out;
1983 }
0a1bb5ea 1984
35482f91 1985 if (!cgfs_chmod_file(controller, cgroup, mode)) {
2c51f8dd
SH
1986 ret = -EINVAL;
1987 goto out;
1988 }
1989
1990 ret = 0;
1991out:
1992 free_key(k);
1993 free(cgdir);
1994 return ret;
fd2e4e03
SH
1995}
1996
ab54b798
SH
1997int cg_mkdir(const char *path, mode_t mode)
1998{
1999 struct fuse_context *fc = fuse_get_context();
febf2b87 2000 char *last = NULL, *path1, *cgdir = NULL, *controller, *next = NULL;
ab54b798 2001 const char *cgroup;
2c51f8dd 2002 int ret;
ab54b798 2003
ab54b798
SH
2004 if (!fc)
2005 return -EIO;
2006
2007
2008 controller = pick_controller_from_path(fc, path);
2009 if (!controller)
f9a05025 2010 return -EINVAL;
ab54b798
SH
2011
2012 cgroup = find_cgroup_in_path(path);
2013 if (!cgroup)
f9a05025 2014 return -EINVAL;
ab54b798 2015
febf2b87
SH
2016 get_cgdir_and_path(cgroup, &cgdir, &last);
2017 if (!last)
ab54b798
SH
2018 path1 = "/";
2019 else
2020 path1 = cgdir;
2021
b11c6ec0 2022 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2023 if (initpid <= 0)
2024 initpid = fc->pid;
0dcc31ea 2025 if (!caller_is_in_ancestor(initpid, controller, path1, &next)) {
87dce5f6
SH
2026 if (!next)
2027 ret = -EINVAL;
2028 else if (last && strcmp(next, last) == 0)
a8b6c3e0
SH
2029 ret = -EEXIST;
2030 else
2031 ret = -ENOENT;
2032 goto out;
2033 }
2034
2c51f8dd
SH
2035 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) {
2036 ret = -EACCES;
2037 goto out;
2038 }
0dcc31ea 2039 if (!caller_is_in_ancestor(initpid, controller, path1, NULL)) {
2c51f8dd
SH
2040 ret = -EACCES;
2041 goto out;
2042 }
ab54b798 2043
af869b9c 2044 ret = cgfs_create(controller, cgroup, fc->uid, fc->gid);
ab54b798 2045
2c51f8dd
SH
2046out:
2047 free(cgdir);
a8b6c3e0 2048 free(next);
2c51f8dd 2049 return ret;
ab54b798
SH
2050}
2051
50d8d5b5
SH
2052static int cg_rmdir(const char *path)
2053{
2054 struct fuse_context *fc = fuse_get_context();
febf2b87 2055 char *last = NULL, *cgdir = NULL, *controller, *next = NULL;
50d8d5b5 2056 const char *cgroup;
2c51f8dd 2057 int ret;
50d8d5b5
SH
2058
2059 if (!fc)
2060 return -EIO;
2061
50d8d5b5
SH
2062 controller = pick_controller_from_path(fc, path);
2063 if (!controller)
f9a05025 2064 return -EINVAL;
50d8d5b5
SH
2065
2066 cgroup = find_cgroup_in_path(path);
2067 if (!cgroup)
f9a05025 2068 return -EINVAL;
50d8d5b5 2069
febf2b87
SH
2070 get_cgdir_and_path(cgroup, &cgdir, &last);
2071 if (!last) {
2c51f8dd
SH
2072 ret = -EINVAL;
2073 goto out;
2074 }
50d8d5b5 2075
b11c6ec0 2076 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2077 if (initpid <= 0)
2078 initpid = fc->pid;
0dcc31ea 2079 if (!caller_is_in_ancestor(initpid, controller, cgroup, &next)) {
febf2b87 2080 if (!last || strcmp(next, last) == 0)
a8b6c3e0
SH
2081 ret = -EBUSY;
2082 else
2083 ret = -ENOENT;
2084 goto out;
2085 }
2086
2c51f8dd
SH
2087 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) {
2088 ret = -EACCES;
2089 goto out;
2090 }
0dcc31ea 2091 if (!caller_is_in_ancestor(initpid, controller, cgroup, NULL)) {
2c51f8dd
SH
2092 ret = -EACCES;
2093 goto out;
2094 }
50d8d5b5 2095
35482f91 2096 if (!cgfs_remove(controller, cgroup)) {
2c51f8dd
SH
2097 ret = -EINVAL;
2098 goto out;
2099 }
50d8d5b5 2100
2c51f8dd
SH
2101 ret = 0;
2102
2103out:
2104 free(cgdir);
a8b6c3e0 2105 free(next);
2c51f8dd 2106 return ret;
50d8d5b5
SH
2107}
2108
2dc17609
SH
2109static bool startswith(const char *line, const char *pref)
2110{
2111 if (strncmp(line, pref, strlen(pref)) == 0)
2112 return true;
2113 return false;
2114}
2115
2116static void get_mem_cached(char *memstat, unsigned long *v)
2117{
2118 char *eol;
2119
2120 *v = 0;
2121 while (*memstat) {
2122 if (startswith(memstat, "total_cache")) {
2123 sscanf(memstat + 11, "%lu", v);
2124 *v /= 1024;
2125 return;
2126 }
2127 eol = strchr(memstat, '\n');
2128 if (!eol)
2129 return;
2130 memstat = eol+1;
2131 }
2132}
2133
49878439 2134static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
2f919d9d 2135{
49878439
YY
2136 char *eol;
2137 char key[32];
2f919d9d 2138
49878439
YY
2139 memset(key, 0, 32);
2140 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
2f919d9d 2141
49878439
YY
2142 size_t len = strlen(key);
2143 *v = 0;
2144
2145 while (*str) {
2146 if (startswith(str, key)) {
2f919d9d
SH
2147 sscanf(str + len, "%lu", v);
2148 return;
2149 }
2150 eol = strchr(str, '\n');
49878439 2151 if (!eol)
2f919d9d 2152 return;
49878439
YY
2153 str = eol+1;
2154 }
2155}
2156
53b43826
SH
2157static int read_file(const char *path, char *buf, size_t size,
2158 struct file_info *d)
2159{
2160 size_t linelen = 0, total_len = 0, rv = 0;
2161 char *line = NULL;
2162 char *cache = d->buf;
2163 size_t cache_size = d->buflen;
2164 FILE *f = fopen(path, "r");
2165 if (!f)
2166 return 0;
2167
2168 while (getline(&line, &linelen, f) != -1) {
2169 size_t l = snprintf(cache, cache_size, "%s", line);
2170 if (l < 0) {
2171 perror("Error writing to cache");
2172 rv = 0;
2173 goto err;
2174 }
2175 if (l >= cache_size) {
2176 fprintf(stderr, "Internal error: truncated write to cache\n");
2177 rv = 0;
2178 goto err;
2179 }
2180 if (l < cache_size) {
2181 cache += l;
2182 cache_size -= l;
2183 total_len += l;
2184 } else {
2185 cache += cache_size;
2186 total_len += cache_size;
2187 cache_size = 0;
2188 break;
2189 }
2190 }
2191
2192 d->size = total_len;
2193 if (total_len > size ) total_len = size;
2194
2195 /* read from off 0 */
2196 memcpy(buf, d->buf, total_len);
2197 rv = total_len;
2198 err:
2199 fclose(f);
2200 free(line);
2201 return rv;
2202}
2203
758ad80c 2204/*
2ad6d2bd 2205 * FUSE ops for /proc
758ad80c 2206 */
758ad80c 2207
7bc95a75
SH
2208static unsigned long get_memlimit(const char *cgroup)
2209{
2210 char *memlimit_str = NULL;
2211 unsigned long memlimit = -1;
2212
35482f91 2213 if (cgfs_get_value("memory", cgroup, "memory.limit_in_bytes", &memlimit_str))
7bc95a75
SH
2214 memlimit = strtoul(memlimit_str, NULL, 10);
2215
2216 free(memlimit_str);
2217
2218 return memlimit;
2219}
2220
2221static unsigned long get_min_memlimit(const char *cgroup)
2222{
2223 char *copy = strdupa(cgroup);
2224 unsigned long memlimit = 0, retlimit;
2225
2226 retlimit = get_memlimit(copy);
2227
2228 while (strcmp(copy, "/") != 0) {
2229 copy = dirname(copy);
2230 memlimit = get_memlimit(copy);
2231 if (memlimit != -1 && memlimit < retlimit)
2232 retlimit = memlimit;
2233 };
2234
2235 return retlimit;
2236}
2237
23ce2127
SH
2238static int proc_meminfo_read(char *buf, size_t size, off_t offset,
2239 struct fuse_file_info *fi)
2240{
2dc17609 2241 struct fuse_context *fc = fuse_get_context();
97f1f27b 2242 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd 2243 char *cg;
4622ad78 2244 char *memusage_str = NULL, *memstat_str = NULL,
b731895e
NW
2245 *memswlimit_str = NULL, *memswusage_str = NULL,
2246 *memswlimit_default_str = NULL, *memswusage_default_str = NULL;
4622ad78
TG
2247 unsigned long memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0,
2248 cached = 0, hosttotal = 0;
2dc17609 2249 char *line = NULL;
e1068397 2250 size_t linelen = 0, total_len = 0, rv = 0;
97f1f27b
YY
2251 char *cache = d->buf;
2252 size_t cache_size = d->buflen;
2c51f8dd 2253 FILE *f = NULL;
2dc17609 2254
97f1f27b
YY
2255 if (offset){
2256 if (offset > d->size)
2257 return -EINVAL;
b5ad2d21
SH
2258 if (!d->cached)
2259 return 0;
97f1f27b
YY
2260 int left = d->size - offset;
2261 total_len = left > size ? size: left;
2262 memcpy(buf, cache + offset, total_len);
2263 return total_len;
2264 }
2dc17609 2265
b11c6ec0 2266 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2267 if (initpid <= 0)
2268 initpid = fc->pid;
0dcc31ea 2269 cg = get_pid_cgroup(initpid, "memory");
2dc17609 2270 if (!cg)
53b43826 2271 return read_file("/proc/meminfo", buf, size, d);
2dc17609 2272
7bc95a75 2273 memlimit = get_min_memlimit(cg);
35482f91 2274 if (!cgfs_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
2c51f8dd 2275 goto err;
35482f91 2276 if (!cgfs_get_value("memory", cg, "memory.stat", &memstat_str))
2c51f8dd 2277 goto err;
4622ad78
TG
2278
2279 // Following values are allowed to fail, because swapaccount might be turned
2280 // off for current kernel
2281 if(cgfs_get_value("memory", cg, "memory.memsw.limit_in_bytes", &memswlimit_str) &&
2282 cgfs_get_value("memory", cg, "memory.memsw.usage_in_bytes", &memswusage_str))
2283 {
b731895e
NW
2284 /* If swapaccounting is turned on, then default value is assumed to be that of cgroup / */
2285 if (!cgfs_get_value("memory", "/", "memory.memsw.limit_in_bytes", &memswlimit_default_str))
2286 goto err;
2287 if (!cgfs_get_value("memory", "/", "memory.memsw.usage_in_bytes", &memswusage_default_str))
2288 goto err;
2289
4622ad78
TG
2290 memswlimit = strtoul(memswlimit_str, NULL, 10);
2291 memswusage = strtoul(memswusage_str, NULL, 10);
b731895e
NW
2292
2293 if (!strcmp(memswlimit_str, memswlimit_default_str))
a2de34ba 2294 memswlimit = 0;
b731895e 2295 if (!strcmp(memswusage_str, memswusage_default_str))
a2de34ba
SH
2296 memswusage = 0;
2297
b731895e
NW
2298 memswlimit = memswlimit / 1024;
2299 memswusage = memswusage / 1024;
4622ad78 2300 }
b731895e
NW
2301
2302 memusage = strtoul(memusage_str, NULL, 10);
2303 memlimit /= 1024;
2304 memusage /= 1024;
2305
2dc17609
SH
2306 get_mem_cached(memstat_str, &cached);
2307
2308 f = fopen("/proc/meminfo", "r");
2309 if (!f)
2c51f8dd 2310 goto err;
2dc17609
SH
2311
2312 while (getline(&line, &linelen, f) != -1) {
2313 size_t l;
2314 char *printme, lbuf[100];
2315
2316 memset(lbuf, 0, 100);
2317 if (startswith(line, "MemTotal:")) {
2318 sscanf(line+14, "%lu", &hosttotal);
2319 if (hosttotal < memlimit)
2320 memlimit = hosttotal;
2321 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
2322 printme = lbuf;
2323 } else if (startswith(line, "MemFree:")) {
2324 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
2325 printme = lbuf;
2326 } else if (startswith(line, "MemAvailable:")) {
2327 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
2328 printme = lbuf;
4622ad78
TG
2329 } else if (startswith(line, "SwapTotal:") && memswlimit > 0) {
2330 snprintf(lbuf, 100, "SwapTotal: %8lu kB\n", memswlimit - memlimit);
2331 printme = lbuf;
2332 } else if (startswith(line, "SwapFree:") && memswlimit > 0 && memswusage > 0) {
2333 snprintf(lbuf, 100, "SwapFree: %8lu kB\n",
2334 (memswlimit - memlimit) - (memswusage - memusage));
2335 printme = lbuf;
2dc17609
SH
2336 } else if (startswith(line, "Buffers:")) {
2337 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
2338 printme = lbuf;
2339 } else if (startswith(line, "Cached:")) {
2340 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
2341 printme = lbuf;
2342 } else if (startswith(line, "SwapCached:")) {
2343 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
2344 printme = lbuf;
2345 } else
2346 printme = line;
97f1f27b
YY
2347
2348 l = snprintf(cache, cache_size, "%s", printme);
e1068397
MM
2349 if (l < 0) {
2350 perror("Error writing to cache");
2351 rv = 0;
2352 goto err;
2353
2354 }
2355 if (l >= cache_size) {
2356 fprintf(stderr, "Internal error: truncated write to cache\n");
2357 rv = 0;
2358 goto err;
2359 }
2360
97f1f27b
YY
2361 cache += l;
2362 cache_size -= l;
2f919d9d 2363 total_len += l;
2dc17609
SH
2364 }
2365
b5ad2d21 2366 d->cached = 1;
97f1f27b
YY
2367 d->size = total_len;
2368 if (total_len > size ) total_len = size;
2369 memcpy(buf, d->buf, total_len);
2370
e1068397 2371 rv = total_len;
2c51f8dd
SH
2372err:
2373 if (f)
2374 fclose(f);
92c84dc4 2375 free(line);
2c51f8dd 2376 free(cg);
2c51f8dd 2377 free(memusage_str);
4622ad78
TG
2378 free(memswlimit_str);
2379 free(memswusage_str);
2c51f8dd 2380 free(memstat_str);
b731895e
NW
2381 free(memswlimit_default_str);
2382 free(memswusage_default_str);
e1068397 2383 return rv;
23ce2127
SH
2384}
2385
2386/*
2387 * Read the cpuset.cpus for cg
2c51f8dd 2388 * Return the answer in a newly allocated string which must be freed
23ce2127
SH
2389 */
2390static char *get_cpuset(const char *cg)
2391{
2392 char *answer;
2393
35482f91 2394 if (!cgfs_get_value("cpuset", cg, "cpuset.cpus", &answer))
23ce2127
SH
2395 return NULL;
2396 return answer;
2397}
2398
fa47bb52 2399bool cpu_in_cpuset(int cpu, const char *cpuset);
23ce2127 2400
aeb56147
SH
2401static bool cpuline_in_cpuset(const char *line, const char *cpuset)
2402{
2403 int cpu;
2404
2405 if (sscanf(line, "processor : %d", &cpu) != 1)
2406 return false;
2407 return cpu_in_cpuset(cpu, cpuset);
2408}
2409
23ce2127
SH
2410/*
2411 * check whether this is a '^processor" line in /proc/cpuinfo
2412 */
2413static bool is_processor_line(const char *line)
2414{
2415 int cpu;
2416
2417 if (sscanf(line, "processor : %d", &cpu) == 1)
2418 return true;
2419 return false;
2420}
2421
23ce2127
SH
2422static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
2423 struct fuse_file_info *fi)
2424{
2425 struct fuse_context *fc = fuse_get_context();
97f1f27b 2426 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2427 char *cg;
2428 char *cpuset = NULL;
23ce2127 2429 char *line = NULL;
e1068397 2430 size_t linelen = 0, total_len = 0, rv = 0;
23ce2127
SH
2431 bool am_printing = false;
2432 int curcpu = -1;
97f1f27b
YY
2433 char *cache = d->buf;
2434 size_t cache_size = d->buflen;
2c51f8dd 2435 FILE *f = NULL;
23ce2127 2436
97f1f27b
YY
2437 if (offset){
2438 if (offset > d->size)
2439 return -EINVAL;
b5ad2d21
SH
2440 if (!d->cached)
2441 return 0;
97f1f27b
YY
2442 int left = d->size - offset;
2443 total_len = left > size ? size: left;
2444 memcpy(buf, cache + offset, total_len);
2f919d9d 2445 return total_len;
97f1f27b 2446 }
23ce2127 2447
b11c6ec0 2448 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2449 if (initpid <= 0)
2450 initpid = fc->pid;
0dcc31ea 2451 cg = get_pid_cgroup(initpid, "cpuset");
23ce2127 2452 if (!cg)
53b43826 2453 return read_file("proc/cpuinfo", buf, size, d);
23ce2127
SH
2454
2455 cpuset = get_cpuset(cg);
2456 if (!cpuset)
2c51f8dd 2457 goto err;
23ce2127
SH
2458
2459 f = fopen("/proc/cpuinfo", "r");
2460 if (!f)
2c51f8dd 2461 goto err;
23ce2127
SH
2462
2463 while (getline(&line, &linelen, f) != -1) {
2464 size_t l;
2465 if (is_processor_line(line)) {
aeb56147 2466 am_printing = cpuline_in_cpuset(line, cpuset);
23ce2127
SH
2467 if (am_printing) {
2468 curcpu ++;
97f1f27b 2469 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
e1068397
MM
2470 if (l < 0) {
2471 perror("Error writing to cache");
2472 rv = 0;
2473 goto err;
2474 }
2475 if (l >= cache_size) {
2476 fprintf(stderr, "Internal error: truncated write to cache\n");
2477 rv = 0;
2478 goto err;
2479 }
97f1f27b
YY
2480 if (l < cache_size){
2481 cache += l;
2482 cache_size -= l;
2483 total_len += l;
2484 }else{
2485 cache += cache_size;
2486 total_len += cache_size;
2487 cache_size = 0;
2488 break;
2489 }
23ce2127
SH
2490 }
2491 continue;
2492 }
2493 if (am_printing) {
97f1f27b 2494 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2495 if (l < 0) {
2496 perror("Error writing to cache");
2497 rv = 0;
2498 goto err;
2499 }
2500 if (l >= cache_size) {
2501 fprintf(stderr, "Internal error: truncated write to cache\n");
2502 rv = 0;
2503 goto err;
2504 }
97f1f27b
YY
2505 if (l < cache_size) {
2506 cache += l;
2507 cache_size -= l;
2508 total_len += l;
2509 } else {
2510 cache += cache_size;
2511 total_len += cache_size;
2512 cache_size = 0;
2513 break;
2514 }
23ce2127
SH
2515 }
2516 }
2517
b5ad2d21 2518 d->cached = 1;
97f1f27b
YY
2519 d->size = total_len;
2520 if (total_len > size ) total_len = size;
2521
2522 /* read from off 0 */
2523 memcpy(buf, d->buf, total_len);
e1068397 2524 rv = total_len;
2c51f8dd
SH
2525err:
2526 if (f)
2527 fclose(f);
92c84dc4 2528 free(line);
2c51f8dd
SH
2529 free(cpuset);
2530 free(cg);
e1068397 2531 return rv;
23ce2127
SH
2532}
2533
2534static int proc_stat_read(char *buf, size_t size, off_t offset,
2535 struct fuse_file_info *fi)
2536{
aeb56147 2537 struct fuse_context *fc = fuse_get_context();
97f1f27b 2538 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2539 char *cg;
2540 char *cpuset = NULL;
aeb56147 2541 char *line = NULL;
e1068397 2542 size_t linelen = 0, total_len = 0, rv = 0;
2a0fde62 2543 int curcpu = -1; /* cpu numbering starts at 0 */
97f1f27b
YY
2544 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0;
2545 unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
2546 irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0;
2547#define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
2548 char cpuall[CPUALL_MAX_SIZE];
2549 /* reserve for cpu all */
2550 char *cache = d->buf + CPUALL_MAX_SIZE;
2551 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
2c51f8dd 2552 FILE *f = NULL;
aeb56147 2553
97f1f27b
YY
2554 if (offset){
2555 if (offset > d->size)
2556 return -EINVAL;
b5ad2d21
SH
2557 if (!d->cached)
2558 return 0;
97f1f27b
YY
2559 int left = d->size - offset;
2560 total_len = left > size ? size: left;
2561 memcpy(buf, d->buf + offset, total_len);
2f919d9d 2562 return total_len;
97f1f27b 2563 }
aeb56147 2564
b11c6ec0 2565 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2566 if (initpid <= 0)
2567 initpid = fc->pid;
0dcc31ea 2568 cg = get_pid_cgroup(initpid, "cpuset");
aeb56147 2569 if (!cg)
53b43826 2570 return read_file("/proc/stat", buf, size, d);
aeb56147
SH
2571
2572 cpuset = get_cpuset(cg);
2573 if (!cpuset)
2c51f8dd 2574 goto err;
aeb56147
SH
2575
2576 f = fopen("/proc/stat", "r");
2577 if (!f)
2c51f8dd 2578 goto err;
aeb56147 2579
97f1f27b
YY
2580 //skip first line
2581 if (getline(&line, &linelen, f) < 0) {
2582 fprintf(stderr, "proc_stat_read read first line failed\n");
2c51f8dd 2583 goto err;
97f1f27b
YY
2584 }
2585
aeb56147
SH
2586 while (getline(&line, &linelen, f) != -1) {
2587 size_t l;
2588 int cpu;
2a0fde62 2589 char cpu_char[10]; /* That's a lot of cores */
aeb56147
SH
2590 char *c;
2591
2a0fde62
CB
2592 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
2593 /* not a ^cpuN line containing a number N, just print it */
97f1f27b 2594 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2595 if (l < 0) {
2596 perror("Error writing to cache");
2597 rv = 0;
2598 goto err;
2599 }
2600 if (l >= cache_size) {
2601 fprintf(stderr, "Internal error: truncated write to cache\n");
2602 rv = 0;
2603 goto err;
2604 }
2605 if (l < cache_size) {
97f1f27b
YY
2606 cache += l;
2607 cache_size -= l;
2608 total_len += l;
2609 continue;
e1068397 2610 } else {
97f1f27b
YY
2611 //no more space, break it
2612 cache += cache_size;
2613 total_len += cache_size;
2614 cache_size = 0;
2615 break;
2616 }
aeb56147 2617 }
2a0fde62
CB
2618
2619 if (sscanf(cpu_char, "%d", &cpu) != 1)
2620 continue;
aeb56147
SH
2621 if (!cpu_in_cpuset(cpu, cpuset))
2622 continue;
2623 curcpu ++;
2624
2625 c = strchr(line, ' ');
2626 if (!c)
2627 continue;
25c5e8fb 2628 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
e1068397
MM
2629 if (l < 0) {
2630 perror("Error writing to cache");
2631 rv = 0;
2632 goto err;
2633
2634 }
2635 if (l >= cache_size) {
2636 fprintf(stderr, "Internal error: truncated write to cache\n");
2637 rv = 0;
2638 goto err;
2639 }
2640
97f1f27b
YY
2641 cache += l;
2642 cache_size -= l;
aeb56147 2643 total_len += l;
2f919d9d 2644
97f1f27b
YY
2645 if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, &idle, &iowait, &irq,
2646 &softirq, &steal, &guest) != 9)
2647 continue;
2648 user_sum += user;
2649 nice_sum += nice;
2650 system_sum += system;
2651 idle_sum += idle;
2652 iowait_sum += iowait;
2653 irq_sum += irq;
2654 softirq_sum += softirq;
2655 steal_sum += steal;
2f919d9d 2656 guest_sum += guest;
97f1f27b
YY
2657 }
2658
2659 cache = d->buf;
2660
2f919d9d 2661 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
97f1f27b
YY
2662 "cpu ", user_sum, nice_sum, system_sum, idle_sum, iowait_sum, irq_sum, softirq_sum, steal_sum, guest_sum);
2663 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE){
2664 memcpy(cache, cpuall, cpuall_len);
2f919d9d 2665 cache += cpuall_len;
2c51f8dd 2666 } else{
97f1f27b
YY
2667 /* shouldn't happen */
2668 fprintf(stderr, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len);
2669 cpuall_len = 0;
2670 }
2671
2672 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
2673 total_len += cpuall_len;
b5ad2d21 2674 d->cached = 1;
97f1f27b
YY
2675 d->size = total_len;
2676 if (total_len > size ) total_len = size;
2677
2678 memcpy(buf, d->buf, total_len);
e1068397 2679 rv = total_len;
2c51f8dd
SH
2680
2681err:
2682 if (f)
2683 fclose(f);
92c84dc4 2684 free(line);
2c51f8dd
SH
2685 free(cpuset);
2686 free(cg);
e1068397 2687 return rv;
23ce2127
SH
2688}
2689
0afd85bd 2690static long int getreaperage(pid_t pid)
41bb9357
SH
2691{
2692 char fnam[100];
41bb9357 2693 struct stat sb;
0afd85bd
SH
2694 int ret;
2695 pid_t qpid;
5ca64c2a 2696
b11c6ec0 2697 qpid = lookup_initpid_in_store(pid);
87dce5f6 2698 if (qpid <= 0)
c0adec85
SH
2699 return 0;
2700
0afd85bd
SH
2701 ret = snprintf(fnam, 100, "/proc/%d", qpid);
2702 if (ret < 0 || ret >= 100)
41bb9357 2703 return 0;
ea56f722 2704
0afd85bd 2705 if (lstat(fnam, &sb) < 0)
41bb9357 2706 return 0;
41bb9357 2707
0afd85bd 2708 return time(NULL) - sb.st_ctime;
41bb9357
SH
2709}
2710
0b6af11b
SH
2711static unsigned long get_reaper_busy(pid_t task)
2712{
b11c6ec0 2713 pid_t initpid = lookup_initpid_in_store(task);
0b6af11b
SH
2714 char *cgroup = NULL, *usage_str = NULL;
2715 unsigned long usage = 0;
2716
87dce5f6 2717 if (initpid <= 0)
41bb9357 2718 return 0;
0b6af11b 2719
0dcc31ea 2720 cgroup = get_pid_cgroup(initpid, "cpuacct");
0b6af11b
SH
2721 if (!cgroup)
2722 goto out;
2723 if (!cgfs_get_value("cpuacct", cgroup, "cpuacct.usage", &usage_str))
2724 goto out;
2725 usage = strtoul(usage_str, NULL, 10);
87e96963 2726 usage /= 1000000000;
0b6af11b
SH
2727
2728out:
2729 free(cgroup);
2730 free(usage_str);
2731 return usage;
41bb9357
SH
2732}
2733
2734/*
2735 * We read /proc/uptime and reuse its second field.
2736 * For the first field, we use the mtime for the reaper for
2737 * the calling pid as returned by getreaperage
2738 */
23ce2127
SH
2739static int proc_uptime_read(char *buf, size_t size, off_t offset,
2740 struct fuse_file_info *fi)
2741{
41bb9357 2742 struct fuse_context *fc = fuse_get_context();
97f1f27b 2743 struct file_info *d = (struct file_info *)fi->fh;
ff96a5f9 2744 long int reaperage = getreaperage(fc->pid);
0b6af11b 2745 unsigned long int busytime = get_reaper_busy(fc->pid), idletime;
b5ad2d21 2746 char *cache = d->buf;
97f1f27b 2747 size_t total_len = 0;
41bb9357 2748
97f1f27b
YY
2749 if (offset){
2750 if (offset > d->size)
2751 return -EINVAL;
b5ad2d21
SH
2752 if (!d->cached)
2753 return 0;
2754 int left = d->size - offset;
2755 total_len = left > size ? size: left;
2756 memcpy(buf, cache + offset, total_len);
2757 return total_len;
97f1f27b
YY
2758 }
2759
0b6af11b 2760 idletime = reaperage - busytime;
f6c0b279
SH
2761 if (idletime > reaperage)
2762 idletime = reaperage;
2763
b5ad2d21 2764 total_len = snprintf(d->buf, d->size, "%ld.0 %lu.0\n", reaperage, idletime);
e1068397
MM
2765 if (total_len < 0){
2766 perror("Error writing to cache");
2767 return 0;
2768 }
cdcdb29b 2769
b5ad2d21
SH
2770 d->size = (int)total_len;
2771 d->cached = 1;
2772
2773 if (total_len > size) total_len = size;
2774
2775 memcpy(buf, d->buf, total_len);
97f1f27b 2776 return total_len;
23ce2127
SH
2777}
2778
49878439
YY
2779static int proc_diskstats_read(char *buf, size_t size, off_t offset,
2780 struct fuse_file_info *fi)
2781{
2782 char dev_name[72];
2783 struct fuse_context *fc = fuse_get_context();
97f1f27b 2784 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2785 char *cg;
2786 char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
49878439
YY
2787 *io_wait_time_str = NULL, *io_service_time_str = NULL;
2788 unsigned long read = 0, write = 0;
2789 unsigned long read_merged = 0, write_merged = 0;
2790 unsigned long read_sectors = 0, write_sectors = 0;
2791 unsigned long read_ticks = 0, write_ticks = 0;
2792 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
2793 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
b5ad2d21
SH
2794 char *cache = d->buf;
2795 size_t cache_size = d->buflen;
49878439 2796 char *line = NULL;
e1068397 2797 size_t linelen = 0, total_len = 0, rv = 0;
49878439
YY
2798 unsigned int major = 0, minor = 0;
2799 int i = 0;
2c51f8dd 2800 FILE *f = NULL;
49878439 2801
97f1f27b
YY
2802 if (offset){
2803 if (offset > d->size)
2804 return -EINVAL;
b5ad2d21
SH
2805 if (!d->cached)
2806 return 0;
2807 int left = d->size - offset;
2808 total_len = left > size ? size: left;
2809 memcpy(buf, cache + offset, total_len);
2810 return total_len;
97f1f27b 2811 }
49878439 2812
b11c6ec0 2813 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2814 if (initpid <= 0)
2815 initpid = fc->pid;
0dcc31ea 2816 cg = get_pid_cgroup(initpid, "blkio");
49878439 2817 if (!cg)
53b43826 2818 return read_file("/proc/diskstats", buf, size, d);
49878439 2819
35482f91 2820 if (!cgfs_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
2c51f8dd 2821 goto err;
35482f91 2822 if (!cgfs_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
2c51f8dd 2823 goto err;
35482f91 2824 if (!cgfs_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
2c51f8dd 2825 goto err;
35482f91 2826 if (!cgfs_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
2c51f8dd 2827 goto err;
35482f91 2828 if (!cgfs_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
2c51f8dd 2829 goto err;
49878439
YY
2830
2831
2832 f = fopen("/proc/diskstats", "r");
2833 if (!f)
2c51f8dd 2834 goto err;
49878439
YY
2835
2836 while (getline(&line, &linelen, f) != -1) {
2837 size_t l;
2838 char *printme, lbuf[256];
2839
c0adec85 2840 i = sscanf(line, "%u %u %71s", &major, &minor, dev_name);
49878439
YY
2841 if(i == 3){
2842 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
2843 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
2844 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
2845 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
2846 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
2847 read_sectors = read_sectors/512;
2848 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
2849 write_sectors = write_sectors/512;
2f919d9d 2850
49878439
YY
2851 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
2852 rd_svctm = rd_svctm/1000000;
2853 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
2854 rd_wait = rd_wait/1000000;
2855 read_ticks = rd_svctm + rd_wait;
2856
2857 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
2858 wr_svctm = wr_svctm/1000000;
2859 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
2860 wr_wait = wr_wait/1000000;
2861 write_ticks = wr_svctm + wr_wait;
2862
2863 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
2864 tot_ticks = tot_ticks/1000000;
2865 }else{
2866 continue;
2867 }
2868
2869 memset(lbuf, 0, 256);
2870 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
2f919d9d 2871 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
49878439
YY
2872 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
2873 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
2874 printme = lbuf;
2875 } else
2876 continue;
2877
b5ad2d21 2878 l = snprintf(cache, cache_size, "%s", printme);
e1068397
MM
2879 if (l < 0) {
2880 perror("Error writing to fuse buf");
2881 rv = 0;
2882 goto err;
2883 }
b5ad2d21 2884 if (l >= cache_size) {
e1068397
MM
2885 fprintf(stderr, "Internal error: truncated write to cache\n");
2886 rv = 0;
2887 goto err;
2888 }
b5ad2d21
SH
2889 cache += l;
2890 cache_size -= l;
49878439
YY
2891 total_len += l;
2892 }
2893
b5ad2d21 2894 d->cached = 1;
97f1f27b 2895 d->size = total_len;
b5ad2d21
SH
2896 if (total_len > size ) total_len = size;
2897 memcpy(buf, d->buf, total_len);
2898
e1068397 2899 rv = total_len;
2c51f8dd
SH
2900err:
2901 free(cg);
2902 if (f)
2903 fclose(f);
49878439 2904 free(line);
2c51f8dd
SH
2905 free(io_serviced_str);
2906 free(io_merged_str);
2907 free(io_service_bytes_str);
2908 free(io_wait_time_str);
2909 free(io_service_time_str);
e1068397 2910 return rv;
49878439
YY
2911}
2912
23ce2127
SH
2913static off_t get_procfile_size(const char *which)
2914{
2915 FILE *f = fopen(which, "r");
2916 char *line = NULL;
2917 size_t len = 0;
2918 ssize_t sz, answer = 0;
2919 if (!f)
2920 return 0;
2921
2922 while ((sz = getline(&line, &len, f)) != -1)
2923 answer += sz;
2924 fclose (f);
92c84dc4 2925 free(line);
23ce2127
SH
2926
2927 return answer;
2928}
2929
758ad80c
SH
2930static int proc_getattr(const char *path, struct stat *sb)
2931{
35629743
SH
2932 struct timespec now;
2933
2934 memset(sb, 0, sizeof(struct stat));
2935 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
2936 return -EINVAL;
2937 sb->st_uid = sb->st_gid = 0;
2938 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
2939 if (strcmp(path, "/proc") == 0) {
2940 sb->st_mode = S_IFDIR | 00555;
2941 sb->st_nlink = 2;
2942 return 0;
2943 }
2944 if (strcmp(path, "/proc/meminfo") == 0 ||
2945 strcmp(path, "/proc/cpuinfo") == 0 ||
2946 strcmp(path, "/proc/uptime") == 0 ||
49878439
YY
2947 strcmp(path, "/proc/stat") == 0 ||
2948 strcmp(path, "/proc/diskstats") == 0) {
7253e0a4 2949 sb->st_size = 0;
35629743
SH
2950 sb->st_mode = S_IFREG | 00444;
2951 sb->st_nlink = 1;
2952 return 0;
2953 }
2954
2955 return -ENOENT;
2956}
2957
2958static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2959 struct fuse_file_info *fi)
2960{
2961 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
2962 filler(buf, "meminfo", NULL, 0) != 0 ||
2963 filler(buf, "stat", NULL, 0) != 0 ||
49878439
YY
2964 filler(buf, "uptime", NULL, 0) != 0 ||
2965 filler(buf, "diskstats", NULL, 0) != 0)
758ad80c 2966 return -EINVAL;
758ad80c
SH
2967 return 0;
2968}
2969
35629743
SH
2970static int proc_open(const char *path, struct fuse_file_info *fi)
2971{
96fc5ee6
SH
2972 int type = -1;
2973 struct file_info *info;
2974
2975 if (strcmp(path, "/proc/meminfo") == 0)
2976 type = LXC_TYPE_PROC_MEMINFO;
2977 else if (strcmp(path, "/proc/cpuinfo") == 0)
2978 type = LXC_TYPE_PROC_CPUINFO;
2979 else if (strcmp(path, "/proc/uptime") == 0)
2980 type = LXC_TYPE_PROC_UPTIME;
2981 else if (strcmp(path, "/proc/stat") == 0)
2982 type = LXC_TYPE_PROC_STAT;
2983 else if (strcmp(path, "/proc/diskstats") == 0)
2984 type = LXC_TYPE_PROC_DISKSTATS;
2985 if (type == -1)
2986 return -ENOENT;
2987
2c51f8dd
SH
2988 info = malloc(sizeof(*info));
2989 if (!info)
2990 return -ENOMEM;
2991
96fc5ee6
SH
2992 memset(info, 0, sizeof(*info));
2993 info->type = type;
2994
97f1f27b 2995 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
2c51f8dd
SH
2996 do {
2997 info->buf = malloc(info->buflen);
2998 } while (!info->buf);
97f1f27b
YY
2999 memset(info->buf, 0, info->buflen);
3000 /* set actual size to buffer size */
2f919d9d 3001 info->size = info->buflen;
97f1f27b 3002
96fc5ee6
SH
3003 fi->fh = (unsigned long)info;
3004 return 0;
3005}
3006
3007static int proc_release(const char *path, struct fuse_file_info *fi)
3008{
3009 struct file_info *f = (struct file_info *)fi->fh;
3010
3011 do_release_file_info(f);
3012 return 0;
35629743
SH
3013}
3014
35629743
SH
3015static int proc_read(const char *path, char *buf, size_t size, off_t offset,
3016 struct fuse_file_info *fi)
3017{
96fc5ee6
SH
3018 struct file_info *f = (struct file_info *) fi->fh;
3019
3020 switch (f->type) {
2f919d9d 3021 case LXC_TYPE_PROC_MEMINFO:
23ce2127 3022 return proc_meminfo_read(buf, size, offset, fi);
96fc5ee6 3023 case LXC_TYPE_PROC_CPUINFO:
23ce2127 3024 return proc_cpuinfo_read(buf, size, offset, fi);
96fc5ee6 3025 case LXC_TYPE_PROC_UPTIME:
23ce2127 3026 return proc_uptime_read(buf, size, offset, fi);
96fc5ee6 3027 case LXC_TYPE_PROC_STAT:
23ce2127 3028 return proc_stat_read(buf, size, offset, fi);
96fc5ee6 3029 case LXC_TYPE_PROC_DISKSTATS:
49878439 3030 return proc_diskstats_read(buf, size, offset, fi);
96fc5ee6
SH
3031 default:
3032 return -EINVAL;
3033 }
35629743
SH
3034}
3035
2ad6d2bd
SH
3036/*
3037 * FUSE ops for /
3038 * these just delegate to the /proc and /cgroup ops as
3039 * needed
3040 */
758ad80c
SH
3041
3042static int lxcfs_getattr(const char *path, struct stat *sb)
3043{
3044 if (strcmp(path, "/") == 0) {
3045 sb->st_mode = S_IFDIR | 00755;
3046 sb->st_nlink = 2;
3047 return 0;
3048 }
3049 if (strncmp(path, "/cgroup", 7) == 0) {
3050 return cg_getattr(path, sb);
3051 }
35629743 3052 if (strncmp(path, "/proc", 5) == 0) {
758ad80c
SH
3053 return proc_getattr(path, sb);
3054 }
3055 return -EINVAL;
3056}
3057
3058static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
3059{
3060 if (strcmp(path, "/") == 0)
3061 return 0;
3062
3063 if (strncmp(path, "/cgroup", 7) == 0) {
3064 return cg_opendir(path, fi);
3065 }
35629743
SH
3066 if (strcmp(path, "/proc") == 0)
3067 return 0;
3068 return -ENOENT;
758ad80c
SH
3069}
3070
3071static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
3072 struct fuse_file_info *fi)
3073{
3074 if (strcmp(path, "/") == 0) {
3075 if (filler(buf, "proc", NULL, 0) != 0 ||
3076 filler(buf, "cgroup", NULL, 0) != 0)
3077 return -EINVAL;
3078 return 0;
3079 }
35629743 3080 if (strncmp(path, "/cgroup", 7) == 0)
758ad80c 3081 return cg_readdir(path, buf, filler, offset, fi);
35629743
SH
3082 if (strcmp(path, "/proc") == 0)
3083 return proc_readdir(path, buf, filler, offset, fi);
758ad80c
SH
3084 return -EINVAL;
3085}
3086
3087static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
3088{
3089 if (strcmp(path, "/") == 0)
3090 return 0;
3091 if (strncmp(path, "/cgroup", 7) == 0) {
3092 return cg_releasedir(path, fi);
3093 }
35629743
SH
3094 if (strcmp(path, "/proc") == 0)
3095 return 0;
758ad80c
SH
3096 return -EINVAL;
3097}
3098
99978832
SH
3099static int lxcfs_open(const char *path, struct fuse_file_info *fi)
3100{
35629743 3101 if (strncmp(path, "/cgroup", 7) == 0)
99978832 3102 return cg_open(path, fi);
35629743
SH
3103 if (strncmp(path, "/proc", 5) == 0)
3104 return proc_open(path, fi);
99978832
SH
3105
3106 return -EINVAL;
3107}
3108
3109static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
3110 struct fuse_file_info *fi)
3111{
35629743 3112 if (strncmp(path, "/cgroup", 7) == 0)
99978832 3113 return cg_read(path, buf, size, offset, fi);
35629743
SH
3114 if (strncmp(path, "/proc", 5) == 0)
3115 return proc_read(path, buf, size, offset, fi);
99978832
SH
3116
3117 return -EINVAL;
3118}
3119
2ad6d2bd
SH
3120int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
3121 struct fuse_file_info *fi)
3122{
3123 if (strncmp(path, "/cgroup", 7) == 0) {
3124 return cg_write(path, buf, size, offset, fi);
3125 }
3126
3127 return -EINVAL;
3128}
3129
99978832
SH
3130static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
3131{
3132 return 0;
3133}
3134
3135static int lxcfs_release(const char *path, struct fuse_file_info *fi)
758ad80c 3136{
8f6e8f5e
SH
3137 if (strncmp(path, "/cgroup", 7) == 0)
3138 return cg_release(path, fi);
8f6e8f5e 3139 if (strncmp(path, "/proc", 5) == 0)
96fc5ee6 3140 return proc_release(path, fi);
8f6e8f5e
SH
3141
3142 return -EINVAL;
99978832
SH
3143}
3144
3145static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
3146{
3147 return 0;
758ad80c
SH
3148}
3149
ab54b798
SH
3150int lxcfs_mkdir(const char *path, mode_t mode)
3151{
3152 if (strncmp(path, "/cgroup", 7) == 0)
3153 return cg_mkdir(path, mode);
3154
3155 return -EINVAL;
3156}
3157
341b21ad
SH
3158int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
3159{
3160 if (strncmp(path, "/cgroup", 7) == 0)
3161 return cg_chown(path, uid, gid);
3162
3163 return -EINVAL;
3164}
3165
2ad6d2bd
SH
3166/*
3167 * cat first does a truncate before doing ops->write. This doesn't
3168 * really make sense for cgroups. So just return 0 always but do
3169 * nothing.
3170 */
3171int lxcfs_truncate(const char *path, off_t newsize)
3172{
3173 if (strncmp(path, "/cgroup", 7) == 0)
3174 return 0;
3175 return -EINVAL;
3176}
3177
50d8d5b5
SH
3178int lxcfs_rmdir(const char *path)
3179{
3180 if (strncmp(path, "/cgroup", 7) == 0)
3181 return cg_rmdir(path);
3182 return -EINVAL;
3183}
3184
fd2e4e03
SH
3185int lxcfs_chmod(const char *path, mode_t mode)
3186{
3187 if (strncmp(path, "/cgroup", 7) == 0)
3188 return cg_chmod(path, mode);
3189 return -EINVAL;
3190}
3191
758ad80c
SH
3192const struct fuse_operations lxcfs_ops = {
3193 .getattr = lxcfs_getattr,
3194 .readlink = NULL,
3195 .getdir = NULL,
3196 .mknod = NULL,
ab54b798 3197 .mkdir = lxcfs_mkdir,
758ad80c 3198 .unlink = NULL,
50d8d5b5 3199 .rmdir = lxcfs_rmdir,
758ad80c
SH
3200 .symlink = NULL,
3201 .rename = NULL,
3202 .link = NULL,
fd2e4e03 3203 .chmod = lxcfs_chmod,
341b21ad 3204 .chown = lxcfs_chown,
2ad6d2bd 3205 .truncate = lxcfs_truncate,
758ad80c 3206 .utime = NULL,
99978832
SH
3207
3208 .open = lxcfs_open,
3209 .read = lxcfs_read,
3210 .release = lxcfs_release,
2ad6d2bd 3211 .write = lxcfs_write,
99978832 3212
758ad80c 3213 .statfs = NULL,
99978832
SH
3214 .flush = lxcfs_flush,
3215 .fsync = lxcfs_fsync,
758ad80c
SH
3216
3217 .setxattr = NULL,
3218 .getxattr = NULL,
3219 .listxattr = NULL,
3220 .removexattr = NULL,
3221
3222 .opendir = lxcfs_opendir,
3223 .readdir = lxcfs_readdir,
3224 .releasedir = lxcfs_releasedir,
3225
3226 .fsyncdir = NULL,
3227 .init = NULL,
3228 .destroy = NULL,
3229 .access = NULL,
3230 .create = NULL,
3231 .ftruncate = NULL,
3232 .fgetattr = NULL,
3233};
3234
99978832 3235static void usage(const char *me)
758ad80c
SH
3236{
3237 fprintf(stderr, "Usage:\n");
3238 fprintf(stderr, "\n");
0b0f73db
SH
3239 fprintf(stderr, "%s mountpoint\n", me);
3240 fprintf(stderr, "%s -h\n", me);
758ad80c
SH
3241 exit(1);
3242}
3243
99978832 3244static bool is_help(char *w)
758ad80c
SH
3245{
3246 if (strcmp(w, "-h") == 0 ||
3247 strcmp(w, "--help") == 0 ||
3248 strcmp(w, "-help") == 0 ||
3249 strcmp(w, "help") == 0)
3250 return true;
3251 return false;
3252}
3253
0b0f73db
SH
3254void swallow_arg(int *argcp, char *argv[], char *which)
3255{
3256 int i;
3257
3258 for (i = 1; argv[i]; i++) {
3259 if (strcmp(argv[i], which) != 0)
3260 continue;
3261 for (; argv[i]; i++) {
3262 argv[i] = argv[i+1];
3263 }
3264 (*argcp)--;
3265 return;
3266 }
3267}
3268
3269void swallow_option(int *argcp, char *argv[], char *opt, char *v)
3270{
3271 int i;
3272
3273 for (i = 1; argv[i]; i++) {
3274 if (!argv[i+1])
3275 continue;
3276 if (strcmp(argv[i], opt) != 0)
3277 continue;
3278 if (strcmp(argv[i+1], v) != 0) {
3279 fprintf(stderr, "Warning: unexpected fuse option %s\n", v);
3280 exit(1);
3281 }
3282 for (; argv[i+1]; i++) {
3283 argv[i] = argv[i+2];
3284 }
3285 (*argcp) -= 2;
3286 return;
3287 }
3288}
3289
758ad80c
SH
3290int main(int argc, char *argv[])
3291{
c0adec85 3292 int ret = -1;
0b0f73db
SH
3293 /*
3294 * what we pass to fuse_main is:
3295 * argv[0] -s -f -o allow_other,directio argv[1] NULL
3296 */
2c51f8dd
SH
3297 int nargs = 5, cnt = 0;
3298 char *newargv[6];
758ad80c 3299
977ac879 3300#ifdef FORTRAVIS
df062bcb
SH
3301 /* for travis which runs on 12.04 */
3302 if (glib_check_version (2, 36, 0) != NULL)
3303 g_type_init ();
977ac879 3304#endif
df062bcb 3305
0b0f73db
SH
3306 /* accomodate older init scripts */
3307 swallow_arg(&argc, argv, "-s");
3308 swallow_arg(&argc, argv, "-f");
3309 swallow_option(&argc, argv, "-o", "allow_other");
3310
2e9c0b32
SH
3311 if (argc == 2 && strcmp(argv[1], "--version") == 0) {
3312 fprintf(stderr, "%s\n", VERSION);
3313 exit(0);
3314 }
0b0f73db 3315 if (argc != 2 || is_help(argv[1]))
758ad80c
SH
3316 usage(argv[0]);
3317
38a76a91 3318 newargv[cnt++] = argv[0];
38a76a91
SH
3319 newargv[cnt++] = "-f";
3320 newargv[cnt++] = "-o";
f466a31e 3321 newargv[cnt++] = "allow_other,direct_io,entry_timeout=0.5,attr_timeout=0.5";
38a76a91
SH
3322 newargv[cnt++] = argv[1];
3323 newargv[cnt++] = NULL;
758ad80c 3324
35482f91 3325 if (!cgfs_setup_controllers())
c0adec85 3326 goto out;
758ad80c 3327
35482f91 3328 ret = fuse_main(nargs, newargv, &lxcfs_ops, NULL);
758ad80c 3329
c0adec85 3330out:
758ad80c 3331 return ret;
2183082c 3332}