]> git.proxmox.com Git - mirror_lxcfs.git/blame - lxcfs.c
Remove unused chunks in caching code
[mirror_lxcfs.git] / lxcfs.c
CommitLineData
758ad80c
SH
1/* lxcfs
2 *
b11c6ec0 3 * Copyright © 2014-2016 Canonical, Inc
758ad80c
SH
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
f2799430 6 * See COPYING file for details.
758ad80c
SH
7 */
8
758ad80c
SH
9#define FUSE_USE_VERSION 26
10
2183082c 11#include <stdio.h>
758ad80c
SH
12#include <dirent.h>
13#include <fcntl.h>
14#include <fuse.h>
15#include <unistd.h>
16#include <errno.h>
17#include <stdbool.h>
18#include <time.h>
19#include <string.h>
20#include <stdlib.h>
21#include <libgen.h>
41bb9357 22#include <sched.h>
b11c6ec0 23#include <pthread.h>
41bb9357 24#include <linux/sched.h>
a05660a6 25#include <sys/socket.h>
41bb9357 26#include <sys/mount.h>
5b2dfd85 27#include <sys/epoll.h>
41bb9357 28#include <wait.h>
758ad80c 29
977ac879 30#ifdef FORTRAVIS
df062bcb
SH
31#define GLIB_DISABLE_DEPRECATION_WARNINGS
32#include <glib-object.h>
977ac879 33#endif
df062bcb 34
35482f91 35#include "cgfs.h"
2e9c0b32 36#include "config.h" // for VERSION
758ad80c 37
443d13f5
SH
38enum {
39 LXC_TYPE_CGDIR,
40 LXC_TYPE_CGFILE,
41 LXC_TYPE_PROC_MEMINFO,
42 LXC_TYPE_PROC_CPUINFO,
43 LXC_TYPE_PROC_UPTIME,
44 LXC_TYPE_PROC_STAT,
45 LXC_TYPE_PROC_DISKSTATS,
46};
47
c688e1b3
SH
48struct file_info {
49 char *controller;
50 char *cgroup;
8f6e8f5e 51 char *file;
443d13f5 52 int type;
c688e1b3
SH
53 char *buf; // unused as of yet
54 int buflen;
97f1f27b 55 int size; //actual data size
b5ad2d21 56 int cached;
c688e1b3
SH
57};
58
97f1f27b
YY
59/* reserve buffer size, for cpuall in /proc/stat */
60#define BUF_RESERVE_SIZE 256
61
2c51f8dd 62/*
b11c6ec0
SH
63 * A table caching which pid is init for a pid namespace.
64 * When looking up which pid is init for $qpid, we first
65 * 1. Stat /proc/$qpid/ns/pid.
66 * 2. Check whether the ino_t is in our store.
67 * a. if not, fork a child in qpid's ns to send us
68 * ucred.pid = 1, and read the initpid. Cache
69 * initpid and creation time for /proc/initpid
70 * in a new store entry.
71 * b. if so, verify that /proc/initpid still matches
72 * what we have saved. If not, clear the store
73 * entry and go back to a. If so, return the
74 * cached initpid.
2c51f8dd 75 */
b11c6ec0
SH
76struct pidns_init_store {
77 ino_t ino; // inode number for /proc/$pid/ns/pid
78 pid_t initpid; // the pid of nit in that ns
79 long int ctime; // the time at which /proc/$initpid was created
80 struct pidns_init_store *next;
8e547050 81 long int lastcheck;
b11c6ec0
SH
82};
83
6d1308cb
SH
84/* lol - look at how they are allocated in the kernel */
85#define PIDNS_HASH_SIZE 4096
a6a559b6 86#define HASH(x) ((x) % PIDNS_HASH_SIZE)
6d1308cb
SH
87
88struct pidns_init_store *pidns_hash_table[PIDNS_HASH_SIZE];
b11c6ec0
SH
89static pthread_mutex_t pidns_store_mutex = PTHREAD_MUTEX_INITIALIZER;
90static void lock_mutex(pthread_mutex_t *l)
2c51f8dd 91{
b11c6ec0 92 int ret;
2c51f8dd 93
b11c6ec0
SH
94 if ((ret = pthread_mutex_lock(l)) != 0) {
95 fprintf(stderr, "pthread_mutex_lock returned:%d %s\n", ret, strerror(ret));
96 exit(1);
97 }
98}
2c51f8dd 99
b11c6ec0
SH
100static void unlock_mutex(pthread_mutex_t *l)
101{
102 int ret;
103
104 if ((ret = pthread_mutex_unlock(l)) != 0) {
105 fprintf(stderr, "pthread_mutex_unlock returned:%d %s\n", ret, strerror(ret));
106 exit(1);
2c51f8dd 107 }
2c51f8dd
SH
108}
109
b11c6ec0
SH
110static void store_lock(void)
111{
112 lock_mutex(&pidns_store_mutex);
113}
114
115static void store_unlock(void)
116{
117 unlock_mutex(&pidns_store_mutex);
118}
119
120/* Must be called under store_lock */
121static bool initpid_still_valid(struct pidns_init_store *e, struct stat *nsfdsb)
122{
123 struct stat initsb;
124 char fnam[100];
125
126 snprintf(fnam, 100, "/proc/%d", e->initpid);
127 if (stat(fnam, &initsb) < 0)
128 return false;
129#if DEBUG
130 fprintf(stderr, "comparing ctime %ld %ld for pid %d\n",
131 e->ctime, initsb.st_ctime, e->initpid);
132#endif
133 if (e->ctime != initsb.st_ctime)
134 return false;
135 return true;
136}
137
138/* Must be called under store_lock */
139static void remove_initpid(struct pidns_init_store *e)
140{
141 struct pidns_init_store *tmp;
6d1308cb 142 int h;
b11c6ec0
SH
143
144#if DEBUG
145 fprintf(stderr, "remove_initpid: removing entry for %d\n", e->initpid);
146#endif
6d1308cb
SH
147 h = HASH(e->ino);
148 if (pidns_hash_table[h] == e) {
149 pidns_hash_table[h] = e->next;
b11c6ec0
SH
150 free(e);
151 return;
152 }
153
6d1308cb 154 tmp = pidns_hash_table[h];
b11c6ec0
SH
155 while (tmp) {
156 if (tmp->next == e) {
157 tmp->next = e->next;
158 free(e);
159 return;
160 }
161 tmp = tmp->next;
162 }
163}
164
8e547050
SH
165#define PURGE_SECS 5
166/* Must be called under store_lock */
167static void prune_initpid_store(void)
168{
169 static long int last_prune = 0;
170 struct pidns_init_store *e, *prev, *delme;
171 long int now, threshold;
6d1308cb 172 int i;
8e547050
SH
173
174 if (!last_prune) {
175 last_prune = time(NULL);
176 return;
177 }
178 now = time(NULL);
179 if (now < last_prune + PURGE_SECS)
180 return;
181#if DEBUG
182 fprintf(stderr, "pruning\n");
183#endif
184 last_prune = now;
185 threshold = now - 2 * PURGE_SECS;
186
6d1308cb
SH
187 for (i = 0; i < PIDNS_HASH_SIZE; i++) {
188 for (prev = NULL, e = pidns_hash_table[i]; e; ) {
189 if (e->lastcheck < threshold) {
8e547050 190#if DEBUG
6d1308cb 191 fprintf(stderr, "Removing cached entry for %d\n", e->initpid);
8e547050 192#endif
6d1308cb
SH
193 delme = e;
194 if (prev)
195 prev->next = e->next;
196 else
197 pidns_hash_table[i] = e->next;
198 e = e->next;
199 free(delme);
200 } else {
201 prev = e;
202 e = e->next;
203 }
8e547050
SH
204 }
205 }
206}
207
b11c6ec0
SH
208/* Must be called under store_lock */
209static void save_initpid(struct stat *sb, pid_t pid)
210{
211 struct pidns_init_store *e;
212 char fpath[100];
213 struct stat procsb;
6d1308cb 214 int h;
b11c6ec0
SH
215
216#if DEBUG
217 fprintf(stderr, "save_initpid: adding entry for %d\n", pid);
218#endif
219 snprintf(fpath, 100, "/proc/%d", pid);
220 if (stat(fpath, &procsb) < 0)
221 return;
222 do {
223 e = malloc(sizeof(*e));
224 } while (!e);
225 e->ino = sb->st_ino;
226 e->initpid = pid;
227 e->ctime = procsb.st_ctime;
6d1308cb
SH
228 h = HASH(e->ino);
229 e->next = pidns_hash_table[h];
8e547050 230 e->lastcheck = time(NULL);
6d1308cb 231 pidns_hash_table[h] = e;
b11c6ec0
SH
232}
233
234/*
235 * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store
236 * entry for the inode number and creation time. Verify that the init pid
237 * is still valid. If not, remove it. Return the entry if valid, NULL
238 * otherwise.
239 * Must be called under store_lock
240 */
241static struct pidns_init_store *lookup_verify_initpid(struct stat *sb)
242{
6d1308cb
SH
243 int h = HASH(sb->st_ino);
244 struct pidns_init_store *e = pidns_hash_table[h];
245
b11c6ec0
SH
246 while (e) {
247 if (e->ino == sb->st_ino) {
8e547050
SH
248 if (initpid_still_valid(e, sb)) {
249 e->lastcheck = time(NULL);
b11c6ec0 250 return e;
8e547050 251 }
b11c6ec0
SH
252 remove_initpid(e);
253 return NULL;
254 }
255 e = e->next;
256 }
257
258 return NULL;
259}
260
261#define SEND_CREDS_OK 0
262#define SEND_CREDS_NOTSK 1
263#define SEND_CREDS_FAIL 2
264static bool recv_creds(int sock, struct ucred *cred, char *v);
265static int wait_for_pid(pid_t pid);
266static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst);
267
268/*
269 * fork a task which switches to @task's namespace and writes '1'.
270 * over a unix sock so we can read the task's reaper's pid in our
271 * namespace
272 */
273static void write_task_init_pid_exit(int sock, pid_t target)
274{
275 struct ucred cred;
276 char fnam[100];
277 pid_t pid;
278 char v;
279 int fd, ret;
280
281 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", (int)target);
282 if (ret < 0 || ret >= sizeof(fnam))
283 _exit(1);
284
285 fd = open(fnam, O_RDONLY);
286 if (fd < 0) {
287 perror("write_task_init_pid_exit open of ns/pid");
288 _exit(1);
289 }
290 if (setns(fd, 0)) {
291 perror("write_task_init_pid_exit setns 1");
292 close(fd);
293 _exit(1);
294 }
295 pid = fork();
296 if (pid < 0)
297 _exit(1);
298 if (pid != 0) {
299 if (!wait_for_pid(pid))
300 _exit(1);
301 _exit(0);
302 }
303
304 /* we are the child */
305 cred.uid = 0;
306 cred.gid = 0;
307 cred.pid = 1;
308 v = '1';
309 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK)
310 _exit(1);
311 _exit(0);
312}
313
314static pid_t get_init_pid_for_task(pid_t task)
315{
316 int sock[2];
317 pid_t pid;
318 pid_t ret = -1;
319 char v = '0';
320 struct ucred cred;
321
322 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
323 perror("socketpair");
324 return -1;
325 }
326
327 pid = fork();
328 if (pid < 0)
329 goto out;
330 if (!pid) {
331 close(sock[1]);
332 write_task_init_pid_exit(sock[0], task);
333 _exit(0);
334 }
335
336 if (!recv_creds(sock[1], &cred, &v))
337 goto out;
338 ret = cred.pid;
339
340out:
341 close(sock[0]);
342 close(sock[1]);
343 if (pid > 0)
344 wait_for_pid(pid);
345 return ret;
346}
347
348static pid_t lookup_initpid_in_store(pid_t qpid)
349{
350 pid_t answer = 0;
351 struct stat sb;
352 struct pidns_init_store *e;
353 char fnam[100];
354
355 snprintf(fnam, 100, "/proc/%d/ns/pid", qpid);
356 store_lock();
357 if (stat(fnam, &sb) < 0)
358 goto out;
359 e = lookup_verify_initpid(&sb);
360 if (e) {
361 answer = e->initpid;
362 goto out;
363 }
364 answer = get_init_pid_for_task(qpid);
365 if (answer > 0)
366 save_initpid(&sb, answer);
367
368out:
8e547050
SH
369 /* we prune at end in case we are returning
370 * the value we were about to return */
371 prune_initpid_store();
b11c6ec0
SH
372 store_unlock();
373 return answer;
374}
0afd85bd 375
a05660a6
SH
376static int wait_for_pid(pid_t pid)
377{
378 int status, ret;
379
87dce5f6
SH
380 if (pid <= 0)
381 return -1;
382
a05660a6
SH
383again:
384 ret = waitpid(pid, &status, 0);
385 if (ret == -1) {
386 if (errno == EINTR)
387 goto again;
388 return -1;
389 }
390 if (ret != pid)
391 goto again;
392 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
393 return -1;
394 return 0;
395}
396
b11c6ec0
SH
397
398/*
399 * append pid to *src.
400 * src: a pointer to a char* in which ot append the pid.
401 * sz: the number of characters printed so far, minus trailing \0.
402 * asz: the allocated size so far
403 * pid: the pid to append
404 */
405static void must_strcat_pid(char **src, size_t *sz, size_t *asz, pid_t pid)
406{
407 char tmp[30];
408
409 int tmplen = sprintf(tmp, "%d\n", (int)pid);
410
411 if (!*src || tmplen + *sz + 1 >= *asz) {
412 char *tmp;
413 do {
414 tmp = realloc(*src, *asz + BUF_RESERVE_SIZE);
415 } while (!tmp);
416 *src = tmp;
417 *asz += BUF_RESERVE_SIZE;
418 }
419 memcpy((*src) +*sz , tmp, tmplen);
420 *sz += tmplen;
421 (*src)[*sz] = '\0';
422}
423
053a659d
SH
424/*
425 * Given a open file * to /proc/pid/{u,g}id_map, and an id
426 * valid in the caller's namespace, return the id mapped into
427 * pid's namespace.
428 * Returns the mapped id, or -1 on error.
429 */
430unsigned int
431convert_id_to_ns(FILE *idfile, unsigned int in_id)
432{
433 unsigned int nsuid, // base id for a range in the idfile's namespace
434 hostuid, // base id for a range in the caller's namespace
435 count; // number of ids in this range
436 char line[400];
437 int ret;
438
439 fseek(idfile, 0L, SEEK_SET);
440 while (fgets(line, 400, idfile)) {
441 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
442 if (ret != 3)
443 continue;
444 if (hostuid + count < hostuid || nsuid + count < nsuid) {
445 /*
446 * uids wrapped around - unexpected as this is a procfile,
447 * so just bail.
448 */
647c89e5 449 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
053a659d
SH
450 nsuid, hostuid, count, line);
451 return -1;
452 }
453 if (hostuid <= in_id && hostuid+count > in_id) {
454 /*
455 * now since hostuid <= in_id < hostuid+count, and
456 * hostuid+count and nsuid+count do not wrap around,
457 * we know that nsuid+(in_id-hostuid) which must be
458 * less that nsuid+(count) must not wrap around
459 */
460 return (in_id - hostuid) + nsuid;
461 }
462 }
463
464 // no answer found
465 return -1;
466}
467
341b21ad
SH
468/*
469 * for is_privileged_over,
470 * specify whether we require the calling uid to be root in his
471 * namespace
472 */
473#define NS_ROOT_REQD true
474#define NS_ROOT_OPT false
475
2c51f8dd
SH
476#define PROCLEN 100
477
341b21ad 478static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
758ad80c 479{
2c51f8dd
SH
480 char fpath[PROCLEN];
481 int ret;
053a659d
SH
482 bool answer = false;
483 uid_t nsuid;
484
341b21ad
SH
485 if (victim == -1 || uid == -1)
486 return false;
487
488 /*
489 * If the request is one not requiring root in the namespace,
490 * then having the same uid suffices. (i.e. uid 1000 has write
491 * access to files owned by uid 1000
492 */
493 if (!req_ns_root && uid == victim)
758ad80c
SH
494 return true;
495
2c51f8dd
SH
496 ret = snprintf(fpath, PROCLEN, "/proc/%d/uid_map", pid);
497 if (ret < 0 || ret >= PROCLEN)
498 return false;
053a659d
SH
499 FILE *f = fopen(fpath, "r");
500 if (!f)
501 return false;
502
341b21ad 503 /* if caller's not root in his namespace, reject */
053a659d
SH
504 nsuid = convert_id_to_ns(f, uid);
505 if (nsuid)
506 goto out;
507
341b21ad
SH
508 /*
509 * If victim is not mapped into caller's ns, reject.
510 * XXX I'm not sure this check is needed given that fuse
511 * will be sending requests where the vfs has converted
512 */
053a659d
SH
513 nsuid = convert_id_to_ns(f, victim);
514 if (nsuid == -1)
515 goto out;
516
517 answer = true;
518
519out:
520 fclose(f);
521 return answer;
758ad80c
SH
522}
523
524static bool perms_include(int fmode, mode_t req_mode)
525{
2ad6d2bd
SH
526 mode_t r;
527
528 switch (req_mode & O_ACCMODE) {
529 case O_RDONLY:
530 r = S_IROTH;
531 break;
532 case O_WRONLY:
533 r = S_IWOTH;
534 break;
535 case O_RDWR:
536 r = S_IROTH | S_IWOTH;
537 break;
538 default:
539 return false;
540 }
541 return ((fmode & r) == r);
758ad80c
SH
542}
543
a8b6c3e0
SH
544
545/*
546 * taskcg is a/b/c
547 * querycg is /a/b/c/d/e
548 * we return 'd'
549 */
3db25a35
SH
550static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
551{
552 char *start, *end;
553
554 if (strlen(taskcg) <= strlen(querycg)) {
555 fprintf(stderr, "%s: I was fed bad input\n", __func__);
556 return NULL;
557 }
558
559 if (strcmp(querycg, "/") == 0)
2c51f8dd 560 start = strdup(taskcg + 1);
3db25a35 561 else
2c51f8dd
SH
562 start = strdup(taskcg + strlen(querycg) + 1);
563 if (!start)
564 return NULL;
3db25a35
SH
565 end = strchr(start, '/');
566 if (end)
567 *end = '\0';
568 return start;
569}
570
2c51f8dd
SH
571static void stripnewline(char *x)
572{
573 size_t l = strlen(x);
574 if (l && x[l-1] == '\n')
575 x[l-1] = '\0';
576}
577
578static char *get_pid_cgroup(pid_t pid, const char *contrl)
579{
580 char fnam[PROCLEN];
581 FILE *f;
582 char *answer = NULL;
583 char *line = NULL;
584 size_t len = 0;
585 int ret;
777dd831
SH
586 const char *h = find_mounted_controller(contrl);
587 if (!h)
588 return NULL;
2c51f8dd
SH
589
590 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
591 if (ret < 0 || ret >= PROCLEN)
592 return NULL;
593 if (!(f = fopen(fnam, "r")))
594 return NULL;
595
596 while (getline(&line, &len, f) != -1) {
597 char *c1, *c2;
598 if (!line[0])
599 continue;
600 c1 = strchr(line, ':');
601 if (!c1)
602 goto out;
603 c1++;
604 c2 = strchr(c1, ':');
605 if (!c2)
606 goto out;
607 *c2 = '\0';
777dd831 608 if (strcmp(c1, h) != 0)
2c51f8dd
SH
609 continue;
610 c2++;
611 stripnewline(c2);
612 do {
613 answer = strdup(c2);
614 } while (!answer);
615 break;
616 }
617
618out:
619 fclose(f);
620 free(line);
621 return answer;
622}
623
758ad80c
SH
624/*
625 * check whether a fuse context may access a cgroup dir or file
626 *
627 * If file is not null, it is a cgroup file to check under cg.
628 * If file is null, then we are checking perms on cg itself.
629 *
630 * For files we can check the mode of the list_keys result.
631 * For cgroups, we must make assumptions based on the files under the
632 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
633 * yet.
634 */
635static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
636{
35482f91 637 struct cgfs_files *k = NULL;
2c51f8dd 638 bool ret = false;
758ad80c 639
35482f91
SH
640 k = cgfs_get_key(contrl, cg, file);
641 if (!k)
758ad80c 642 return false;
35482f91
SH
643
644 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
645 if (perms_include(k->mode >> 6, mode)) {
646 ret = true;
2c51f8dd 647 goto out;
758ad80c
SH
648 }
649 }
35482f91
SH
650 if (fc->gid == k->gid) {
651 if (perms_include(k->mode >> 3, mode)) {
652 ret = true;
653 goto out;
654 }
655 }
656 ret = perms_include(k->mode, mode);
758ad80c 657
2c51f8dd 658out:
35482f91 659 free_key(k);
2c51f8dd 660 return ret;
3db25a35
SH
661}
662
04b5cbdc
SH
663#define INITSCOPE "/init.scope"
664static void prune_init_slice(char *cg)
665{
666 char *point;
826297d7
SF
667 size_t cg_len = strlen(cg), initscope_len = strlen(INITSCOPE);
668
669 if (cg_len < initscope_len)
670 return;
671
672 point = cg + cg_len - initscope_len;
04b5cbdc
SH
673 if (strcmp(point, INITSCOPE) == 0) {
674 if (point == cg)
675 *(point+1) = '\0';
676 else
677 *point = '\0';
678 }
679}
680
3db25a35 681/*
0dcc31ea
SH
682 * If pid is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
683 * If pid is in /a, he may act on /a/b, but not on /b.
3db25a35 684 * if the answer is false and nextcg is not NULL, then *nextcg will point
2c51f8dd
SH
685 * to a string containing the next cgroup directory under cg, which must be
686 * freed by the caller.
3db25a35
SH
687 */
688static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
689{
3db25a35 690 bool answer = false;
a8b6c3e0
SH
691 char *c2 = get_pid_cgroup(pid, contrl);
692 char *linecmp;
3db25a35 693
a8b6c3e0 694 if (!c2)
3db25a35 695 return false;
a8b6c3e0 696 prune_init_slice(c2);
3db25a35 697
a8b6c3e0
SH
698 /*
699 * callers pass in '/' for root cgroup, otherwise they pass
700 * in a cgroup without leading '/'
701 */
702 linecmp = *cg == '/' ? c2 : c2+1;
703 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
704 if (nextcg) {
705 *nextcg = get_next_cgroup_dir(linecmp, cg);
3db25a35 706 }
a8b6c3e0
SH
707 goto out;
708 }
709 answer = true;
710
711out:
712 free(c2);
713 return answer;
714}
715
716/*
0dcc31ea 717 * If pid is in /a/b/c, he may see that /a exists, but not /b or /a/c.
a8b6c3e0
SH
718 */
719static bool caller_may_see_dir(pid_t pid, const char *contrl, const char *cg)
720{
721 bool answer = false;
722 char *c2, *task_cg;
723 size_t target_len, task_len;
724
725 if (strcmp(cg, "/") == 0)
726 return true;
727
728 c2 = get_pid_cgroup(pid, contrl);
a8b6c3e0
SH
729 if (!c2)
730 return false;
ec3b236f 731 prune_init_slice(c2);
a8b6c3e0
SH
732
733 task_cg = c2 + 1;
734 target_len = strlen(cg);
735 task_len = strlen(task_cg);
a57cba3c
TA
736 if (task_len == 0) {
737 /* Task is in the root cg, it can see everything. This case is
738 * not handled by the strmcps below, since they test for the
739 * last /, but that is the first / that we've chopped off
740 * above.
741 */
742 answer = true;
743 goto out;
744 }
a8b6c3e0 745 if (strcmp(cg, task_cg) == 0) {
3db25a35
SH
746 answer = true;
747 goto out;
748 }
a8b6c3e0
SH
749 if (target_len < task_len) {
750 /* looking up a parent dir */
751 if (strncmp(task_cg, cg, target_len) == 0 && task_cg[target_len] == '/')
752 answer = true;
753 goto out;
754 }
755 if (target_len > task_len) {
756 /* looking up a child dir */
757 if (strncmp(task_cg, cg, task_len) == 0 && cg[task_len] == '/')
758 answer = true;
759 goto out;
760 }
3db25a35
SH
761
762out:
a8b6c3e0 763 free(c2);
3db25a35
SH
764 return answer;
765}
766
758ad80c 767/*
2c51f8dd
SH
768 * given /cgroup/freezer/a/b, return "freezer".
769 * the returned char* should NOT be freed.
758ad80c
SH
770 */
771static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
772{
773 const char *p1;
2c51f8dd 774 char *contr, *slash;
758ad80c
SH
775
776 if (strlen(path) < 9)
777 return NULL;
ac5d9d48
SH
778 if (*(path+7) != '/')
779 return NULL;
758ad80c 780 p1 = path+8;
2c51f8dd
SH
781 contr = strdupa(p1);
782 if (!contr)
783 return NULL;
784 slash = strstr(contr, "/");
758ad80c
SH
785 if (slash)
786 *slash = '\0';
787
758ad80c 788 int i;
35482f91
SH
789 for (i = 0; i < num_hierarchies; i++) {
790 if (hierarchies[i] && strcmp(hierarchies[i], contr) == 0)
791 return hierarchies[i];
758ad80c 792 }
758ad80c
SH
793 return NULL;
794}
795
796/*
797 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
798 * Note that the returned value may include files (keynames) etc
799 */
800static const char *find_cgroup_in_path(const char *path)
801{
802 const char *p1;
803
804 if (strlen(path) < 9)
805 return NULL;
806 p1 = strstr(path+8, "/");
807 if (!p1)
808 return NULL;
809 return p1+1;
810}
811
2c51f8dd 812/*
febf2b87
SH
813 * split the last path element from the path in @cg.
814 * @dir is newly allocated and should be freed, @last not
815*/
816static void get_cgdir_and_path(const char *cg, char **dir, char **last)
758ad80c 817{
758ad80c
SH
818 char *p;
819
2c51f8dd
SH
820 do {
821 *dir = strdup(cg);
822 } while (!*dir);
febf2b87
SH
823 *last = strrchr(cg, '/');
824 if (!*last) {
825 *last = NULL;
758ad80c
SH
826 return;
827 }
828 p = strrchr(*dir, '/');
829 *p = '\0';
830}
831
832/*
2ad6d2bd 833 * FUSE ops for /cgroup
758ad80c 834 */
2ad6d2bd 835
758ad80c
SH
836static int cg_getattr(const char *path, struct stat *sb)
837{
838 struct timespec now;
839 struct fuse_context *fc = fuse_get_context();
2c51f8dd 840 char * cgdir = NULL;
febf2b87 841 char *last = NULL, *path1, *path2;
35482f91 842 struct cgfs_files *k = NULL;
758ad80c 843 const char *cgroup;
2c51f8dd
SH
844 const char *controller = NULL;
845 int ret = -ENOENT;
758ad80c
SH
846
847
848 if (!fc)
849 return -EIO;
850
851 memset(sb, 0, sizeof(struct stat));
852
853 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
854 return -EINVAL;
855
856 sb->st_uid = sb->st_gid = 0;
857 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
858 sb->st_size = 0;
859
860 if (strcmp(path, "/cgroup") == 0) {
861 sb->st_mode = S_IFDIR | 00755;
862 sb->st_nlink = 2;
863 return 0;
864 }
865
866 controller = pick_controller_from_path(fc, path);
867 if (!controller)
868 return -EIO;
758ad80c
SH
869 cgroup = find_cgroup_in_path(path);
870 if (!cgroup) {
871 /* this is just /cgroup/controller, return it as a dir */
872 sb->st_mode = S_IFDIR | 00755;
873 sb->st_nlink = 2;
874 return 0;
875 }
341b21ad 876
febf2b87 877 get_cgdir_and_path(cgroup, &cgdir, &last);
758ad80c 878
febf2b87 879 if (!last) {
758ad80c
SH
880 path1 = "/";
881 path2 = cgdir;
882 } else {
883 path1 = cgdir;
febf2b87 884 path2 = last;
758ad80c
SH
885 }
886
b11c6ec0 887 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
888 if (initpid <= 0)
889 initpid = fc->pid;
758ad80c 890 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
febf2b87
SH
891 * Then check that caller's cgroup is under path if last is a child
892 * cgroup, or cgdir if last is a file */
758ad80c
SH
893
894 if (is_child_cgroup(controller, path1, path2)) {
0dcc31ea 895 if (!caller_may_see_dir(initpid, controller, cgroup)) {
a8b6c3e0
SH
896 ret = -ENOENT;
897 goto out;
898 }
0dcc31ea 899 if (!caller_is_in_ancestor(initpid, controller, cgroup, NULL)) {
f9a05025
SH
900 /* this is just /cgroup/controller, return it as a dir */
901 sb->st_mode = S_IFDIR | 00555;
902 sb->st_nlink = 2;
2c51f8dd
SH
903 ret = 0;
904 goto out;
905 }
906 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
907 ret = -EACCES;
908 goto out;
f9a05025 909 }
758ad80c 910
053a659d
SH
911 // get uid, gid, from '/tasks' file and make up a mode
912 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
913 sb->st_mode = S_IFDIR | 00755;
febf2b87 914 k = cgfs_get_key(controller, cgroup, NULL);
053a659d 915 if (!k) {
053a659d
SH
916 sb->st_uid = sb->st_gid = 0;
917 } else {
053a659d
SH
918 sb->st_uid = k->uid;
919 sb->st_gid = k->gid;
920 }
2c51f8dd 921 free_key(k);
758ad80c 922 sb->st_nlink = 2;
2c51f8dd
SH
923 ret = 0;
924 goto out;
758ad80c
SH
925 }
926
35482f91 927 if ((k = cgfs_get_key(controller, path1, path2)) != NULL) {
758ad80c 928 sb->st_mode = S_IFREG | k->mode;
053a659d 929 sb->st_nlink = 1;
758ad80c
SH
930 sb->st_uid = k->uid;
931 sb->st_gid = k->gid;
7253e0a4 932 sb->st_size = 0;
2c51f8dd 933 free_key(k);
0dcc31ea 934 if (!caller_is_in_ancestor(initpid, controller, path1, NULL)) {
adc3867b
SH
935 ret = -ENOENT;
936 goto out;
937 }
938 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY)) {
939 ret = -EACCES;
940 goto out;
941 }
2c51f8dd
SH
942
943 ret = 0;
758ad80c
SH
944 }
945
2c51f8dd
SH
946out:
947 free(cgdir);
948 return ret;
758ad80c 949}
2183082c 950
758ad80c 951static int cg_opendir(const char *path, struct fuse_file_info *fi)
2183082c 952{
7f163b71 953 struct fuse_context *fc = fuse_get_context();
7f163b71 954 const char *cgroup;
c688e1b3 955 struct file_info *dir_info;
2c51f8dd 956 char *controller = NULL;
7f163b71
SH
957
958 if (!fc)
959 return -EIO;
960
c688e1b3
SH
961 if (strcmp(path, "/cgroup") == 0) {
962 cgroup = NULL;
963 controller = NULL;
964 } else {
965 // return list of keys for the controller, and list of child cgroups
966 controller = pick_controller_from_path(fc, path);
967 if (!controller)
968 return -EIO;
7f163b71 969
c688e1b3
SH
970 cgroup = find_cgroup_in_path(path);
971 if (!cgroup) {
972 /* this is just /cgroup/controller, return its contents */
973 cgroup = "/";
974 }
7f163b71
SH
975 }
976
b11c6ec0 977 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
978 if (initpid <= 0)
979 initpid = fc->pid;
a8b6c3e0 980 if (cgroup) {
0dcc31ea 981 if (!caller_may_see_dir(initpid, controller, cgroup))
a8b6c3e0
SH
982 return -ENOENT;
983 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
984 return -EACCES;
2c51f8dd 985 }
c688e1b3
SH
986
987 /* we'll free this at cg_releasedir */
2c51f8dd
SH
988 dir_info = malloc(sizeof(*dir_info));
989 if (!dir_info)
990 return -ENOMEM;
35482f91
SH
991 dir_info->controller = must_copy_string(controller);
992 dir_info->cgroup = must_copy_string(cgroup);
443d13f5 993 dir_info->type = LXC_TYPE_CGDIR;
c688e1b3 994 dir_info->buf = NULL;
8f6e8f5e 995 dir_info->file = NULL;
c688e1b3
SH
996 dir_info->buflen = 0;
997
998 fi->fh = (unsigned long)dir_info;
758ad80c
SH
999 return 0;
1000}
1001
758ad80c
SH
1002static int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
1003 struct fuse_file_info *fi)
1004{
c688e1b3 1005 struct file_info *d = (struct file_info *)fi->fh;
35482f91 1006 struct cgfs_files **list = NULL;
2c51f8dd
SH
1007 int i, ret;
1008 char *nextcg = NULL;
758ad80c 1009 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1010 char **clist = NULL;
758ad80c 1011
443d13f5 1012 if (d->type != LXC_TYPE_CGDIR) {
b845ad01
SH
1013 fprintf(stderr, "Internal error: file cache info used in readdir\n");
1014 return -EIO;
1015 }
c688e1b3
SH
1016 if (!d->cgroup && !d->controller) {
1017 // ls /var/lib/lxcfs/cgroup - just show list of controllers
758ad80c
SH
1018 int i;
1019
35482f91
SH
1020 for (i = 0; i < num_hierarchies; i++) {
1021 if (hierarchies[i] && filler(buf, hierarchies[i], NULL, 0) != 0) {
758ad80c
SH
1022 return -EIO;
1023 }
1024 }
1025 return 0;
1026 }
1027
35482f91 1028 if (!cgfs_list_keys(d->controller, d->cgroup, &list)) {
3db25a35 1029 // not a valid cgroup
2c51f8dd
SH
1030 ret = -EINVAL;
1031 goto out;
1032 }
3db25a35 1033
b11c6ec0 1034 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
1035 if (initpid <= 0)
1036 initpid = fc->pid;
0dcc31ea 1037 if (!caller_is_in_ancestor(initpid, d->controller, d->cgroup, &nextcg)) {
3db25a35
SH
1038 if (nextcg) {
1039 int ret;
1040 ret = filler(buf, nextcg, NULL, 0);
2c51f8dd
SH
1041 free(nextcg);
1042 if (ret != 0) {
1043 ret = -EIO;
1044 goto out;
1045 }
3db25a35 1046 }
2c51f8dd
SH
1047 ret = 0;
1048 goto out;
3db25a35
SH
1049 }
1050
758ad80c 1051 for (i = 0; list[i]; i++) {
758ad80c 1052 if (filler(buf, list[i]->name, NULL, 0) != 0) {
2c51f8dd
SH
1053 ret = -EIO;
1054 goto out;
758ad80c
SH
1055 }
1056 }
1057
1058 // now get the list of child cgroups
758ad80c 1059
35482f91 1060 if (!cgfs_list_children(d->controller, d->cgroup, &clist)) {
2c51f8dd
SH
1061 ret = 0;
1062 goto out;
1063 }
758ad80c 1064 for (i = 0; clist[i]; i++) {
758ad80c 1065 if (filler(buf, clist[i], NULL, 0) != 0) {
2c51f8dd
SH
1066 ret = -EIO;
1067 goto out;
758ad80c
SH
1068 }
1069 }
2c51f8dd
SH
1070 ret = 0;
1071
1072out:
1073 free_keys(list);
1074 if (clist) {
1075 for (i = 0; clist[i]; i++)
1076 free(clist[i]);
1077 free(clist);
1078 }
1079 return ret;
758ad80c
SH
1080}
1081
8f6e8f5e
SH
1082static void do_release_file_info(struct file_info *f)
1083{
2c51f8dd
SH
1084 if (!f)
1085 return;
1086 free(f->controller);
1087 free(f->cgroup);
1088 free(f->file);
1089 free(f->buf);
1090 free(f);
8f6e8f5e
SH
1091}
1092
758ad80c
SH
1093static int cg_releasedir(const char *path, struct fuse_file_info *fi)
1094{
c688e1b3
SH
1095 struct file_info *d = (struct file_info *)fi->fh;
1096
8f6e8f5e 1097 do_release_file_info(d);
758ad80c
SH
1098 return 0;
1099}
1100
99978832
SH
1101static int cg_open(const char *path, struct fuse_file_info *fi)
1102{
99978832 1103 const char *cgroup;
febf2b87 1104 char *last = NULL, *path1, *path2, * cgdir = NULL, *controller;
35482f91 1105 struct cgfs_files *k = NULL;
8f6e8f5e 1106 struct file_info *file_info;
99978832 1107 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1108 int ret;
99978832
SH
1109
1110 if (!fc)
1111 return -EIO;
1112
1113 controller = pick_controller_from_path(fc, path);
1114 if (!controller)
1115 return -EIO;
1116 cgroup = find_cgroup_in_path(path);
1117 if (!cgroup)
1118 return -EINVAL;
1119
febf2b87
SH
1120 get_cgdir_and_path(cgroup, &cgdir, &last);
1121 if (!last) {
99978832
SH
1122 path1 = "/";
1123 path2 = cgdir;
1124 } else {
1125 path1 = cgdir;
febf2b87 1126 path2 = last;
99978832
SH
1127 }
1128
35482f91 1129 k = cgfs_get_key(controller, path1, path2);
2c51f8dd
SH
1130 if (!k) {
1131 ret = -EINVAL;
1132 goto out;
1133 }
1134 free_key(k);
99978832 1135
b11c6ec0 1136 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
1137 if (initpid <= 0)
1138 initpid = fc->pid;
0dcc31ea 1139 if (!caller_may_see_dir(initpid, controller, path1)) {
a8b6c3e0
SH
1140 ret = -ENOENT;
1141 goto out;
1142 }
2c51f8dd 1143 if (!fc_may_access(fc, controller, path1, path2, fi->flags)) {
8f6e8f5e 1144 // should never get here
2c51f8dd
SH
1145 ret = -EACCES;
1146 goto out;
1147 }
99978832 1148
8f6e8f5e 1149 /* we'll free this at cg_release */
2c51f8dd
SH
1150 file_info = malloc(sizeof(*file_info));
1151 if (!file_info) {
1152 ret = -ENOMEM;
1153 goto out;
1154 }
35482f91
SH
1155 file_info->controller = must_copy_string(controller);
1156 file_info->cgroup = must_copy_string(path1);
1157 file_info->file = must_copy_string(path2);
443d13f5 1158 file_info->type = LXC_TYPE_CGFILE;
8f6e8f5e
SH
1159 file_info->buf = NULL;
1160 file_info->buflen = 0;
1161
1162 fi->fh = (unsigned long)file_info;
2c51f8dd
SH
1163 ret = 0;
1164
1165out:
1166 free(cgdir);
1167 return ret;
8f6e8f5e
SH
1168}
1169
1170static int cg_release(const char *path, struct fuse_file_info *fi)
1171{
1172 struct file_info *f = (struct file_info *)fi->fh;
1173
1174 do_release_file_info(f);
1175 return 0;
99978832
SH
1176}
1177
5b2dfd85
SH
1178#define POLLIN_SET ( EPOLLIN | EPOLLHUP | EPOLLRDHUP )
1179
1180static bool wait_for_sock(int sock, int timeout)
a05660a6 1181{
5b2dfd85 1182 struct epoll_event ev;
c26e12cb 1183 int epfd, ret, now, starttime, deltatime, saved_errno;
5b2dfd85 1184
c26e12cb
SH
1185 if ((starttime = time(NULL)) < 0)
1186 return false;
1187
1188 if ((epfd = epoll_create(1)) < 0) {
5b2dfd85
SH
1189 fprintf(stderr, "Failed to create epoll socket: %m\n");
1190 return false;
1191 }
1192
1193 ev.events = POLLIN_SET;
1194 ev.data.fd = sock;
1195 if (epoll_ctl(epfd, EPOLL_CTL_ADD, sock, &ev) < 0) {
1196 fprintf(stderr, "Failed adding socket to epoll: %m\n");
1197 close(epfd);
1198 return false;
1199 }
1200
c26e12cb
SH
1201again:
1202 if ((now = time(NULL)) < 0) {
1203 close(epfd);
1204 return false;
1205 }
a05660a6 1206
c26e12cb
SH
1207 deltatime = (starttime + timeout) - now;
1208 if (deltatime < 0) { // timeout
1209 errno = 0;
b11c6ec0 1210 close(epfd);
5b2dfd85 1211 return false;
c26e12cb
SH
1212 }
1213 ret = epoll_wait(epfd, &ev, 1, 1000*deltatime + 1);
1214 if (ret < 0 && errno == EINTR)
1215 goto again;
1216 saved_errno = errno;
1217 close(epfd);
1218
1219 if (ret <= 0) {
1220 errno = saved_errno;
5b2dfd85
SH
1221 return false;
1222 }
1223 return true;
1224}
a05660a6 1225
5b2dfd85
SH
1226static int msgrecv(int sockfd, void *buf, size_t len)
1227{
1228 if (!wait_for_sock(sockfd, 2))
a05660a6
SH
1229 return -1;
1230 return recv(sockfd, buf, len, MSG_DONTWAIT);
1231}
1232
01e71852 1233static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
a05660a6
SH
1234{
1235 struct msghdr msg = { 0 };
1236 struct iovec iov;
1237 struct cmsghdr *cmsg;
1238 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
1239 char buf[1];
1240 buf[0] = 'p';
1241
01e71852
SH
1242 if (pingfirst) {
1243 if (msgrecv(sock, buf, 1) != 1) {
1420baf8 1244 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
01e71852
SH
1245 __func__);
1246 return SEND_CREDS_FAIL;
1247 }
a05660a6
SH
1248 }
1249
1250 msg.msg_control = cmsgbuf;
1251 msg.msg_controllen = sizeof(cmsgbuf);
1252
1253 cmsg = CMSG_FIRSTHDR(&msg);
1254 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
1255 cmsg->cmsg_level = SOL_SOCKET;
1256 cmsg->cmsg_type = SCM_CREDENTIALS;
1257 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
1258
1259 msg.msg_name = NULL;
1260 msg.msg_namelen = 0;
1261
1262 buf[0] = v;
1263 iov.iov_base = buf;
1264 iov.iov_len = sizeof(buf);
1265 msg.msg_iov = &iov;
1266 msg.msg_iovlen = 1;
1267
1268 if (sendmsg(sock, &msg, 0) < 0) {
1420baf8 1269 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
a05660a6
SH
1270 strerror(errno));
1271 if (errno == 3)
01e71852
SH
1272 return SEND_CREDS_NOTSK;
1273 return SEND_CREDS_FAIL;
a05660a6
SH
1274 }
1275
01e71852 1276 return SEND_CREDS_OK;
a05660a6
SH
1277}
1278
1279static bool recv_creds(int sock, struct ucred *cred, char *v)
1280{
1281 struct msghdr msg = { 0 };
1282 struct iovec iov;
1283 struct cmsghdr *cmsg;
1284 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
1285 char buf[1];
1286 int ret;
1287 int optval = 1;
1288
1289 *v = '1';
1290
1291 cred->pid = -1;
1292 cred->uid = -1;
1293 cred->gid = -1;
1294
1295 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
1420baf8 1296 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
a05660a6
SH
1297 return false;
1298 }
1299 buf[0] = '1';
1300 if (write(sock, buf, 1) != 1) {
1420baf8 1301 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
a05660a6
SH
1302 return false;
1303 }
1304
1305 msg.msg_name = NULL;
1306 msg.msg_namelen = 0;
1307 msg.msg_control = cmsgbuf;
1308 msg.msg_controllen = sizeof(cmsgbuf);
1309
1310 iov.iov_base = buf;
1311 iov.iov_len = sizeof(buf);
1312 msg.msg_iov = &iov;
1313 msg.msg_iovlen = 1;
1314
5b2dfd85
SH
1315 if (!wait_for_sock(sock, 2)) {
1316 fprintf(stderr, "Timed out waiting for scm_cred: %s\n",
6ee867dc
SH
1317 strerror(errno));
1318 return false;
1319 }
1320 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
a05660a6 1321 if (ret < 0) {
1420baf8 1322 fprintf(stderr, "Failed to receive scm_cred: %s\n",
a05660a6
SH
1323 strerror(errno));
1324 return false;
1325 }
1326
1327 cmsg = CMSG_FIRSTHDR(&msg);
1328
1329 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
1330 cmsg->cmsg_level == SOL_SOCKET &&
1331 cmsg->cmsg_type == SCM_CREDENTIALS) {
1332 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
1333 }
1334 *v = buf[0];
1335
1336 return true;
1337}
1338
1339
1340/*
4775fba1
SH
1341 * pid_to_ns - reads pids from a ucred over a socket, then writes the
1342 * int value back over the socket. This shifts the pid from the
1343 * sender's pidns into tpid's pidns.
a05660a6 1344 */
4775fba1 1345static void pid_to_ns(int sock, pid_t tpid)
a05660a6
SH
1346{
1347 char v = '0';
1348 struct ucred cred;
1349
1350 while (recv_creds(sock, &cred, &v)) {
1351 if (v == '1')
67bd113f 1352 _exit(0);
a05660a6 1353 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
67bd113f 1354 _exit(1);
a05660a6 1355 }
67bd113f 1356 _exit(0);
a05660a6
SH
1357}
1358
1359/*
4775fba1 1360 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
a05660a6 1361 * in your old pidns. Only children which you fork will be in the target
4775fba1 1362 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
a05660a6
SH
1363 * actually convert pids
1364 */
4775fba1 1365static void pid_to_ns_wrapper(int sock, pid_t tpid)
a05660a6 1366{
ea56f722 1367 int newnsfd = -1, ret, cpipe[2];
a05660a6
SH
1368 char fnam[100];
1369 pid_t cpid;
ea56f722 1370 char v;
a05660a6 1371
c0adec85
SH
1372 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1373 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1374 _exit(1);
a05660a6
SH
1375 newnsfd = open(fnam, O_RDONLY);
1376 if (newnsfd < 0)
67bd113f 1377 _exit(1);
a05660a6 1378 if (setns(newnsfd, 0) < 0)
67bd113f 1379 _exit(1);
a05660a6
SH
1380 close(newnsfd);
1381
ea56f722 1382 if (pipe(cpipe) < 0)
67bd113f 1383 _exit(1);
a05660a6 1384
ea56f722 1385 cpid = fork();
a05660a6 1386 if (cpid < 0)
67bd113f 1387 _exit(1);
ea56f722
SH
1388
1389 if (!cpid) {
1390 char b = '1';
1391 close(cpipe[0]);
1392 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1393 fprintf(stderr, "%s (child): erorr on write: %s\n",
1394 __func__, strerror(errno));
1395 }
1396 close(cpipe[1]);
4775fba1 1397 pid_to_ns(sock, tpid);
ff96a5f9 1398 _exit(1); // not reached
ea56f722
SH
1399 }
1400 // give the child 1 second to be done forking and
ff96a5f9 1401 // write its ack
5b2dfd85 1402 if (!wait_for_sock(cpipe[0], 1))
ff96a5f9 1403 _exit(1);
ea56f722 1404 ret = read(cpipe[0], &v, 1);
ff96a5f9
SH
1405 if (ret != sizeof(char) || v != '1')
1406 _exit(1);
ea56f722 1407
a05660a6 1408 if (!wait_for_pid(cpid))
67bd113f
SH
1409 _exit(1);
1410 _exit(0);
a05660a6
SH
1411}
1412
1413/*
1414 * To read cgroup files with a particular pid, we will setns into the child
1415 * pidns, open a pipe, fork a child - which will be the first to really be in
35482f91 1416 * the child ns - which does the cgfs_get_value and writes the data to the pipe.
a05660a6
SH
1417 */
1418static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
1419{
1420 int sock[2] = {-1, -1};
2c51f8dd 1421 char *tmpdata = NULL;
a05660a6
SH
1422 int ret;
1423 pid_t qpid, cpid = -1;
1424 bool answer = false;
1425 char v = '0';
1426 struct ucred cred;
2c51f8dd 1427 size_t sz = 0, asz = 0;
a05660a6 1428
35482f91 1429 if (!cgfs_get_value(contrl, cg, file, &tmpdata))
a05660a6
SH
1430 return false;
1431
1432 /*
1433 * Now we read the pids from returned data one by one, pass
1434 * them into a child in the target namespace, read back the
1435 * translated pids, and put them into our to-return data
1436 */
1437
1438 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1439 perror("socketpair");
2c51f8dd
SH
1440 free(tmpdata);
1441 return false;
a05660a6
SH
1442 }
1443
1444 cpid = fork();
1445 if (cpid == -1)
1446 goto out;
1447
ff96a5f9 1448 if (!cpid) // child - exits when done
4775fba1 1449 pid_to_ns_wrapper(sock[1], tpid);
a05660a6
SH
1450
1451 char *ptr = tmpdata;
1452 cred.uid = 0;
1453 cred.gid = 0;
1454 while (sscanf(ptr, "%d\n", &qpid) == 1) {
1455 cred.pid = qpid;
01e71852
SH
1456 ret = send_creds(sock[0], &cred, v, true);
1457
1458 if (ret == SEND_CREDS_NOTSK)
1459 goto next;
1460 if (ret == SEND_CREDS_FAIL)
a05660a6
SH
1461 goto out;
1462
1463 // read converted results
5b2dfd85
SH
1464 if (!wait_for_sock(sock[0], 2)) {
1465 fprintf(stderr, "%s: timed out waiting for pid from child: %s\n",
6ee867dc 1466 __func__, strerror(errno));
a05660a6
SH
1467 goto out;
1468 }
1469 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1470 fprintf(stderr, "%s: error reading pid from child: %s\n",
1471 __func__, strerror(errno));
a05660a6
SH
1472 goto out;
1473 }
2c51f8dd 1474 must_strcat_pid(d, &sz, &asz, qpid);
01e71852 1475next:
a05660a6
SH
1476 ptr = strchr(ptr, '\n');
1477 if (!ptr)
1478 break;
1479 ptr++;
1480 }
1481
1482 cred.pid = getpid();
1483 v = '1';
01e71852 1484 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
a05660a6 1485 // failed to ask child to exit
6ee867dc
SH
1486 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
1487 __func__, strerror(errno));
a05660a6
SH
1488 goto out;
1489 }
1490
1491 answer = true;
1492
1493out:
2c51f8dd 1494 free(tmpdata);
a05660a6
SH
1495 if (cpid != -1)
1496 wait_for_pid(cpid);
1497 if (sock[0] != -1) {
1498 close(sock[0]);
1499 close(sock[1]);
1500 }
1501 return answer;
1502}
1503
99978832
SH
1504static int cg_read(const char *path, char *buf, size_t size, off_t offset,
1505 struct fuse_file_info *fi)
1506{
99978832 1507 struct fuse_context *fc = fuse_get_context();
8f6e8f5e 1508 struct file_info *f = (struct file_info *)fi->fh;
35482f91 1509 struct cgfs_files *k = NULL;
2c51f8dd
SH
1510 char *data = NULL;
1511 int ret, s;
1512 bool r;
99978832 1513
443d13f5 1514 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1515 fprintf(stderr, "Internal error: directory cache info used in cg_read\n");
1516 return -EIO;
1517 }
1518
99978832 1519 if (offset)
7253e0a4 1520 return 0;
99978832
SH
1521
1522 if (!fc)
1523 return -EIO;
1524
8f6e8f5e 1525 if (!f->controller)
99978832
SH
1526 return -EINVAL;
1527
35482f91 1528 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
2c51f8dd
SH
1529 return -EINVAL;
1530 }
1531 free_key(k);
99978832 1532
99978832 1533
2c51f8dd
SH
1534 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY)) { // should never get here
1535 ret = -EACCES;
1536 goto out;
1537 }
a05660a6 1538
2c51f8dd
SH
1539 if (strcmp(f->file, "tasks") == 0 ||
1540 strcmp(f->file, "/tasks") == 0 ||
1541 strcmp(f->file, "/cgroup.procs") == 0 ||
1542 strcmp(f->file, "cgroup.procs") == 0)
1543 // special case - we have to translate the pids
1544 r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data);
1545 else
35482f91 1546 r = cgfs_get_value(f->controller, f->cgroup, f->file, &data);
99978832 1547
2c51f8dd
SH
1548 if (!r) {
1549 ret = -EINVAL;
1550 goto out;
1551 }
99978832 1552
2c51f8dd
SH
1553 if (!data) {
1554 ret = 0;
1555 goto out;
99978832 1556 }
2c51f8dd
SH
1557 s = strlen(data);
1558 if (s > size)
1559 s = size;
1560 memcpy(buf, data, s);
1561 if (s > 0 && s < size && data[s-1] != '\n')
1562 buf[s++] = '\n';
99978832 1563
2c51f8dd
SH
1564 ret = s;
1565
1566out:
1567 free(data);
1568 return ret;
99978832
SH
1569}
1570
4775fba1
SH
1571static void pid_from_ns(int sock, pid_t tpid)
1572{
1573 pid_t vpid;
1574 struct ucred cred;
1575 char v;
6ee867dc 1576 int ret;
4775fba1
SH
1577
1578 cred.uid = 0;
1579 cred.gid = 0;
6ee867dc 1580 while (1) {
5b2dfd85
SH
1581 if (!wait_for_sock(sock, 2)) {
1582 fprintf(stderr, "%s: timeout reading from parent\n", __func__);
67bd113f 1583 _exit(1);
6ee867dc
SH
1584 }
1585 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
1586 fprintf(stderr, "%s: bad read from parent: %s\n",
1587 __func__, strerror(errno));
67bd113f 1588 _exit(1);
6ee867dc 1589 }
4775fba1 1590 if (vpid == -1) // done
01e71852 1591 break;
4775fba1
SH
1592 v = '0';
1593 cred.pid = vpid;
01e71852 1594 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
4775fba1
SH
1595 v = '1';
1596 cred.pid = getpid();
01e71852 1597 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
67bd113f 1598 _exit(1);
4775fba1
SH
1599 }
1600 }
67bd113f 1601 _exit(0);
4775fba1
SH
1602}
1603
1604static void pid_from_ns_wrapper(int sock, pid_t tpid)
1605{
ea56f722 1606 int newnsfd = -1, ret, cpipe[2];
4775fba1
SH
1607 char fnam[100];
1608 pid_t cpid;
ea56f722 1609 char v;
4775fba1 1610
c0adec85
SH
1611 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1612 if (ret < 0 || ret >= sizeof(fnam))
67bd113f 1613 _exit(1);
4775fba1
SH
1614 newnsfd = open(fnam, O_RDONLY);
1615 if (newnsfd < 0)
67bd113f 1616 _exit(1);
4775fba1 1617 if (setns(newnsfd, 0) < 0)
67bd113f 1618 _exit(1);
4775fba1
SH
1619 close(newnsfd);
1620
ea56f722 1621 if (pipe(cpipe) < 0)
67bd113f 1622 _exit(1);
ea56f722
SH
1623
1624loop:
4775fba1
SH
1625 cpid = fork();
1626
1627 if (cpid < 0)
67bd113f 1628 _exit(1);
ea56f722
SH
1629
1630 if (!cpid) {
1631 char b = '1';
1632 close(cpipe[0]);
1633 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1634 fprintf(stderr, "%s (child): erorr on write: %s\n",
1635 __func__, strerror(errno));
1636 }
1637 close(cpipe[1]);
4775fba1 1638 pid_from_ns(sock, tpid);
ea56f722
SH
1639 }
1640
1641 // give the child 1 second to be done forking and
c26e12cb 1642 // write its ack
5b2dfd85 1643 if (!wait_for_sock(cpipe[0], 1))
ea56f722
SH
1644 goto again;
1645 ret = read(cpipe[0], &v, 1);
1646 if (ret != sizeof(char) || v != '1') {
1647 goto again;
1648 }
1649
4775fba1 1650 if (!wait_for_pid(cpid))
67bd113f
SH
1651 _exit(1);
1652 _exit(0);
ea56f722
SH
1653
1654again:
1655 kill(cpid, SIGKILL);
1656 wait_for_pid(cpid);
1657 goto loop;
4775fba1
SH
1658}
1659
8ee2a503
SH
1660/*
1661 * Given host @uid, return the uid to which it maps in
1662 * @pid's user namespace, or -1 if none.
1663 */
1664bool hostuid_to_ns(uid_t uid, pid_t pid, uid_t *answer)
1665{
1666 FILE *f;
1667 char line[400];
1668
1669 sprintf(line, "/proc/%d/uid_map", pid);
1670 if ((f = fopen(line, "r")) == NULL) {
1671 return false;
1672 }
1673
1674 *answer = convert_id_to_ns(f, uid);
1675 fclose(f);
1676
1677 if (*answer == -1)
1678 return false;
1679 return true;
1680}
1681
1682/*
1683 * get_pid_creds: get the real uid and gid of @pid from
1684 * /proc/$$/status
1685 * (XXX should we use euid here?)
1686 */
1687void get_pid_creds(pid_t pid, uid_t *uid, gid_t *gid)
1688{
1689 char line[400];
1690 uid_t u;
1691 gid_t g;
1692 FILE *f;
1693
1694 *uid = -1;
1695 *gid = -1;
1696 sprintf(line, "/proc/%d/status", pid);
1697 if ((f = fopen(line, "r")) == NULL) {
1698 fprintf(stderr, "Error opening %s: %s\n", line, strerror(errno));
1699 return;
1700 }
1701 while (fgets(line, 400, f)) {
1702 if (strncmp(line, "Uid:", 4) == 0) {
1703 if (sscanf(line+4, "%u", &u) != 1) {
1704 fprintf(stderr, "bad uid line for pid %u\n", pid);
1705 fclose(f);
1706 return;
1707 }
1708 *uid = u;
1709 } else if (strncmp(line, "Gid:", 4) == 0) {
1710 if (sscanf(line+4, "%u", &g) != 1) {
1711 fprintf(stderr, "bad gid line for pid %u\n", pid);
1712 fclose(f);
1713 return;
1714 }
1715 *gid = g;
1716 }
1717 }
1718 fclose(f);
1719}
1720
1721/*
1722 * May the requestor @r move victim @v to a new cgroup?
1723 * This is allowed if
1724 * . they are the same task
1725 * . they are ownedy by the same uid
1726 * . @r is root on the host, or
1727 * . @v's uid is mapped into @r's where @r is root.
1728 */
1729bool may_move_pid(pid_t r, uid_t r_uid, pid_t v)
1730{
1731 uid_t v_uid, tmpuid;
1732 gid_t v_gid;
1733
1734 if (r == v)
1735 return true;
1736 if (r_uid == 0)
1737 return true;
1738 get_pid_creds(v, &v_uid, &v_gid);
1739 if (r_uid == v_uid)
1740 return true;
1741 if (hostuid_to_ns(r_uid, r, &tmpuid) && tmpuid == 0
1742 && hostuid_to_ns(v_uid, r, &tmpuid))
1743 return true;
1744 return false;
1745}
1746
1747static bool do_write_pids(pid_t tpid, uid_t tuid, const char *contrl, const char *cg,
1748 const char *file, const char *buf)
4775fba1
SH
1749{
1750 int sock[2] = {-1, -1};
1751 pid_t qpid, cpid = -1;
35482f91 1752 FILE *pids_file = NULL;
4775fba1
SH
1753 bool answer = false, fail = false;
1754
35482f91
SH
1755 pids_file = open_pids_file(contrl, cg);
1756 if (!pids_file)
1757 return false;
1758
4775fba1
SH
1759 /*
1760 * write the pids to a socket, have helper in writer's pidns
1761 * call movepid for us
1762 */
1763 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
1764 perror("socketpair");
35482f91 1765 goto out;
4775fba1
SH
1766 }
1767
1768 cpid = fork();
1769 if (cpid == -1)
1770 goto out;
1771
35482f91
SH
1772 if (!cpid) { // child
1773 fclose(pids_file);
4775fba1 1774 pid_from_ns_wrapper(sock[1], tpid);
35482f91 1775 }
4775fba1
SH
1776
1777 const char *ptr = buf;
1778 while (sscanf(ptr, "%d", &qpid) == 1) {
1779 struct ucred cred;
1780 char v;
1781
1782 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
6ee867dc
SH
1783 fprintf(stderr, "%s: error writing pid to child: %s\n",
1784 __func__, strerror(errno));
4775fba1
SH
1785 goto out;
1786 }
1787
01e71852
SH
1788 if (recv_creds(sock[0], &cred, &v)) {
1789 if (v == '0') {
8ee2a503
SH
1790 if (!may_move_pid(tpid, tuid, cred.pid)) {
1791 fail = true;
1792 break;
1793 }
35482f91 1794 if (fprintf(pids_file, "%d", (int) cred.pid) < 0)
01e71852
SH
1795 fail = true;
1796 }
4775fba1
SH
1797 }
1798
1799 ptr = strchr(ptr, '\n');
1800 if (!ptr)
1801 break;
1802 ptr++;
1803 }
1804
1805 /* All good, write the value */
1806 qpid = -1;
1807 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
1420baf8 1808 fprintf(stderr, "Warning: failed to ask child to exit\n");
4775fba1
SH
1809
1810 if (!fail)
1811 answer = true;
1812
1813out:
1814 if (cpid != -1)
1815 wait_for_pid(cpid);
1816 if (sock[0] != -1) {
1817 close(sock[0]);
1818 close(sock[1]);
1819 }
35482f91
SH
1820 if (pids_file) {
1821 if (fclose(pids_file) != 0)
1822 answer = false;
1823 }
4775fba1
SH
1824 return answer;
1825}
1826
2ad6d2bd
SH
1827int cg_write(const char *path, const char *buf, size_t size, off_t offset,
1828 struct fuse_file_info *fi)
1829{
2ad6d2bd 1830 struct fuse_context *fc = fuse_get_context();
2c51f8dd 1831 char *localbuf = NULL;
35482f91 1832 struct cgfs_files *k = NULL;
8f6e8f5e 1833 struct file_info *f = (struct file_info *)fi->fh;
2c51f8dd 1834 bool r;
2ad6d2bd 1835
443d13f5 1836 if (f->type != LXC_TYPE_CGFILE) {
b845ad01
SH
1837 fprintf(stderr, "Internal error: directory cache info used in cg_write\n");
1838 return -EIO;
1839 }
1840
2ad6d2bd 1841 if (offset)
7253e0a4 1842 return 0;
2ad6d2bd
SH
1843
1844 if (!fc)
1845 return -EIO;
1846
2c51f8dd 1847 localbuf = alloca(size+1);
47cbf0e5
SH
1848 localbuf[size] = '\0';
1849 memcpy(localbuf, buf, size);
2ad6d2bd 1850
35482f91 1851 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
2c51f8dd
SH
1852 size = -EINVAL;
1853 goto out;
1854 }
2ad6d2bd 1855
2c51f8dd
SH
1856 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY)) {
1857 size = -EACCES;
1858 goto out;
1859 }
4775fba1 1860
2c51f8dd
SH
1861 if (strcmp(f->file, "tasks") == 0 ||
1862 strcmp(f->file, "/tasks") == 0 ||
1863 strcmp(f->file, "/cgroup.procs") == 0 ||
1864 strcmp(f->file, "cgroup.procs") == 0)
1865 // special case - we have to translate the pids
8ee2a503 1866 r = do_write_pids(fc->pid, fc->uid, f->controller, f->cgroup, f->file, localbuf);
2c51f8dd 1867 else
35482f91 1868 r = cgfs_set_value(f->controller, f->cgroup, f->file, localbuf);
2ad6d2bd 1869
2c51f8dd
SH
1870 if (!r)
1871 size = -EINVAL;
2ad6d2bd 1872
2c51f8dd
SH
1873out:
1874 free_key(k);
1875 return size;
2ad6d2bd
SH
1876}
1877
341b21ad
SH
1878int cg_chown(const char *path, uid_t uid, gid_t gid)
1879{
1880 struct fuse_context *fc = fuse_get_context();
febf2b87 1881 char *cgdir = NULL, *last = NULL, *path1, *path2, *controller;
35482f91 1882 struct cgfs_files *k = NULL;
341b21ad 1883 const char *cgroup;
2c51f8dd 1884 int ret;
341b21ad
SH
1885
1886 if (!fc)
1887 return -EIO;
1888
1889 if (strcmp(path, "/cgroup") == 0)
1890 return -EINVAL;
1891
1892 controller = pick_controller_from_path(fc, path);
1893 if (!controller)
f9a05025 1894 return -EINVAL;
341b21ad
SH
1895 cgroup = find_cgroup_in_path(path);
1896 if (!cgroup)
1897 /* this is just /cgroup/controller */
1898 return -EINVAL;
1899
febf2b87 1900 get_cgdir_and_path(cgroup, &cgdir, &last);
341b21ad 1901
febf2b87 1902 if (!last) {
341b21ad
SH
1903 path1 = "/";
1904 path2 = cgdir;
1905 } else {
1906 path1 = cgdir;
febf2b87 1907 path2 = last;
341b21ad
SH
1908 }
1909
1910 if (is_child_cgroup(controller, path1, path2)) {
1911 // get uid, gid, from '/tasks' file and make up a mode
1912 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
35482f91 1913 k = cgfs_get_key(controller, cgroup, "tasks");
341b21ad
SH
1914
1915 } else
35482f91 1916 k = cgfs_get_key(controller, path1, path2);
341b21ad 1917
2c51f8dd
SH
1918 if (!k) {
1919 ret = -EINVAL;
1920 goto out;
1921 }
341b21ad
SH
1922
1923 /*
1924 * This being a fuse request, the uid and gid must be valid
1925 * in the caller's namespace. So we can just check to make
1926 * sure that the caller is root in his uid, and privileged
1927 * over the file's current owner.
1928 */
2c51f8dd
SH
1929 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) {
1930 ret = -EACCES;
1931 goto out;
1932 }
341b21ad 1933
1f69d62e 1934 ret = cgfs_chown_file(controller, cgroup, uid, gid);
2c51f8dd
SH
1935
1936out:
1937 free_key(k);
1938 free(cgdir);
1939
1940 return ret;
341b21ad 1941}
2ad6d2bd 1942
fd2e4e03
SH
1943int cg_chmod(const char *path, mode_t mode)
1944{
0a1bb5ea 1945 struct fuse_context *fc = fuse_get_context();
febf2b87 1946 char * cgdir = NULL, *last = NULL, *path1, *path2, *controller;
35482f91 1947 struct cgfs_files *k = NULL;
0a1bb5ea 1948 const char *cgroup;
2c51f8dd 1949 int ret;
0a1bb5ea
SH
1950
1951 if (!fc)
1952 return -EIO;
1953
1954 if (strcmp(path, "/cgroup") == 0)
1955 return -EINVAL;
1956
1957 controller = pick_controller_from_path(fc, path);
1958 if (!controller)
f9a05025 1959 return -EINVAL;
0a1bb5ea
SH
1960 cgroup = find_cgroup_in_path(path);
1961 if (!cgroup)
1962 /* this is just /cgroup/controller */
1963 return -EINVAL;
1964
febf2b87 1965 get_cgdir_and_path(cgroup, &cgdir, &last);
0a1bb5ea 1966
febf2b87 1967 if (!last) {
0a1bb5ea
SH
1968 path1 = "/";
1969 path2 = cgdir;
1970 } else {
1971 path1 = cgdir;
febf2b87 1972 path2 = last;
0a1bb5ea
SH
1973 }
1974
1975 if (is_child_cgroup(controller, path1, path2)) {
1976 // get uid, gid, from '/tasks' file and make up a mode
1977 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
35482f91 1978 k = cgfs_get_key(controller, cgroup, "tasks");
0a1bb5ea
SH
1979
1980 } else
35482f91 1981 k = cgfs_get_key(controller, path1, path2);
0a1bb5ea 1982
2c51f8dd
SH
1983 if (!k) {
1984 ret = -EINVAL;
1985 goto out;
1986 }
0a1bb5ea
SH
1987
1988 /*
1989 * This being a fuse request, the uid and gid must be valid
1990 * in the caller's namespace. So we can just check to make
1991 * sure that the caller is root in his uid, and privileged
1992 * over the file's current owner.
1993 */
2c51f8dd
SH
1994 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
1995 ret = -EPERM;
1996 goto out;
1997 }
0a1bb5ea 1998
35482f91 1999 if (!cgfs_chmod_file(controller, cgroup, mode)) {
2c51f8dd
SH
2000 ret = -EINVAL;
2001 goto out;
2002 }
2003
2004 ret = 0;
2005out:
2006 free_key(k);
2007 free(cgdir);
2008 return ret;
fd2e4e03
SH
2009}
2010
ab54b798
SH
2011int cg_mkdir(const char *path, mode_t mode)
2012{
2013 struct fuse_context *fc = fuse_get_context();
febf2b87 2014 char *last = NULL, *path1, *cgdir = NULL, *controller, *next = NULL;
ab54b798 2015 const char *cgroup;
2c51f8dd 2016 int ret;
ab54b798 2017
ab54b798
SH
2018 if (!fc)
2019 return -EIO;
2020
2021
2022 controller = pick_controller_from_path(fc, path);
2023 if (!controller)
f9a05025 2024 return -EINVAL;
ab54b798
SH
2025
2026 cgroup = find_cgroup_in_path(path);
2027 if (!cgroup)
f9a05025 2028 return -EINVAL;
ab54b798 2029
febf2b87
SH
2030 get_cgdir_and_path(cgroup, &cgdir, &last);
2031 if (!last)
ab54b798
SH
2032 path1 = "/";
2033 else
2034 path1 = cgdir;
2035
b11c6ec0 2036 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2037 if (initpid <= 0)
2038 initpid = fc->pid;
0dcc31ea 2039 if (!caller_is_in_ancestor(initpid, controller, path1, &next)) {
87dce5f6
SH
2040 if (!next)
2041 ret = -EINVAL;
2042 else if (last && strcmp(next, last) == 0)
a8b6c3e0
SH
2043 ret = -EEXIST;
2044 else
2045 ret = -ENOENT;
2046 goto out;
2047 }
2048
2c51f8dd
SH
2049 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) {
2050 ret = -EACCES;
2051 goto out;
2052 }
0dcc31ea 2053 if (!caller_is_in_ancestor(initpid, controller, path1, NULL)) {
2c51f8dd
SH
2054 ret = -EACCES;
2055 goto out;
2056 }
ab54b798 2057
af869b9c 2058 ret = cgfs_create(controller, cgroup, fc->uid, fc->gid);
ab54b798 2059
2c51f8dd
SH
2060out:
2061 free(cgdir);
a8b6c3e0 2062 free(next);
2c51f8dd 2063 return ret;
ab54b798
SH
2064}
2065
50d8d5b5
SH
2066static int cg_rmdir(const char *path)
2067{
2068 struct fuse_context *fc = fuse_get_context();
febf2b87 2069 char *last = NULL, *cgdir = NULL, *controller, *next = NULL;
50d8d5b5 2070 const char *cgroup;
2c51f8dd 2071 int ret;
50d8d5b5
SH
2072
2073 if (!fc)
2074 return -EIO;
2075
50d8d5b5
SH
2076 controller = pick_controller_from_path(fc, path);
2077 if (!controller)
f9a05025 2078 return -EINVAL;
50d8d5b5
SH
2079
2080 cgroup = find_cgroup_in_path(path);
2081 if (!cgroup)
f9a05025 2082 return -EINVAL;
50d8d5b5 2083
febf2b87
SH
2084 get_cgdir_and_path(cgroup, &cgdir, &last);
2085 if (!last) {
2c51f8dd
SH
2086 ret = -EINVAL;
2087 goto out;
2088 }
50d8d5b5 2089
b11c6ec0 2090 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2091 if (initpid <= 0)
2092 initpid = fc->pid;
0dcc31ea 2093 if (!caller_is_in_ancestor(initpid, controller, cgroup, &next)) {
febf2b87 2094 if (!last || strcmp(next, last) == 0)
a8b6c3e0
SH
2095 ret = -EBUSY;
2096 else
2097 ret = -ENOENT;
2098 goto out;
2099 }
2100
2c51f8dd
SH
2101 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) {
2102 ret = -EACCES;
2103 goto out;
2104 }
0dcc31ea 2105 if (!caller_is_in_ancestor(initpid, controller, cgroup, NULL)) {
2c51f8dd
SH
2106 ret = -EACCES;
2107 goto out;
2108 }
50d8d5b5 2109
35482f91 2110 if (!cgfs_remove(controller, cgroup)) {
2c51f8dd
SH
2111 ret = -EINVAL;
2112 goto out;
2113 }
50d8d5b5 2114
2c51f8dd
SH
2115 ret = 0;
2116
2117out:
2118 free(cgdir);
a8b6c3e0 2119 free(next);
2c51f8dd 2120 return ret;
50d8d5b5
SH
2121}
2122
2dc17609
SH
2123static bool startswith(const char *line, const char *pref)
2124{
2125 if (strncmp(line, pref, strlen(pref)) == 0)
2126 return true;
2127 return false;
2128}
2129
2130static void get_mem_cached(char *memstat, unsigned long *v)
2131{
2132 char *eol;
2133
2134 *v = 0;
2135 while (*memstat) {
2136 if (startswith(memstat, "total_cache")) {
2137 sscanf(memstat + 11, "%lu", v);
2138 *v /= 1024;
2139 return;
2140 }
2141 eol = strchr(memstat, '\n');
2142 if (!eol)
2143 return;
2144 memstat = eol+1;
2145 }
2146}
2147
49878439 2148static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
2f919d9d 2149{
49878439
YY
2150 char *eol;
2151 char key[32];
2f919d9d 2152
49878439
YY
2153 memset(key, 0, 32);
2154 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
2f919d9d 2155
49878439
YY
2156 size_t len = strlen(key);
2157 *v = 0;
2158
2159 while (*str) {
2160 if (startswith(str, key)) {
2f919d9d
SH
2161 sscanf(str + len, "%lu", v);
2162 return;
2163 }
2164 eol = strchr(str, '\n');
49878439 2165 if (!eol)
2f919d9d 2166 return;
49878439
YY
2167 str = eol+1;
2168 }
2169}
2170
53b43826
SH
2171static int read_file(const char *path, char *buf, size_t size,
2172 struct file_info *d)
2173{
2174 size_t linelen = 0, total_len = 0, rv = 0;
2175 char *line = NULL;
2176 char *cache = d->buf;
2177 size_t cache_size = d->buflen;
2178 FILE *f = fopen(path, "r");
2179 if (!f)
2180 return 0;
2181
2182 while (getline(&line, &linelen, f) != -1) {
2183 size_t l = snprintf(cache, cache_size, "%s", line);
2184 if (l < 0) {
2185 perror("Error writing to cache");
2186 rv = 0;
2187 goto err;
2188 }
2189 if (l >= cache_size) {
2190 fprintf(stderr, "Internal error: truncated write to cache\n");
2191 rv = 0;
2192 goto err;
2193 }
711b4030
SF
2194 cache += l;
2195 cache_size -= l;
2196 total_len += l;
53b43826
SH
2197 }
2198
2199 d->size = total_len;
2200 if (total_len > size ) total_len = size;
2201
2202 /* read from off 0 */
2203 memcpy(buf, d->buf, total_len);
2204 rv = total_len;
2205 err:
2206 fclose(f);
2207 free(line);
2208 return rv;
2209}
2210
758ad80c 2211/*
2ad6d2bd 2212 * FUSE ops for /proc
758ad80c 2213 */
758ad80c 2214
7bc95a75
SH
2215static unsigned long get_memlimit(const char *cgroup)
2216{
2217 char *memlimit_str = NULL;
2218 unsigned long memlimit = -1;
2219
35482f91 2220 if (cgfs_get_value("memory", cgroup, "memory.limit_in_bytes", &memlimit_str))
7bc95a75
SH
2221 memlimit = strtoul(memlimit_str, NULL, 10);
2222
2223 free(memlimit_str);
2224
2225 return memlimit;
2226}
2227
2228static unsigned long get_min_memlimit(const char *cgroup)
2229{
2230 char *copy = strdupa(cgroup);
2231 unsigned long memlimit = 0, retlimit;
2232
2233 retlimit = get_memlimit(copy);
2234
2235 while (strcmp(copy, "/") != 0) {
2236 copy = dirname(copy);
2237 memlimit = get_memlimit(copy);
2238 if (memlimit != -1 && memlimit < retlimit)
2239 retlimit = memlimit;
2240 };
2241
2242 return retlimit;
2243}
2244
23ce2127
SH
2245static int proc_meminfo_read(char *buf, size_t size, off_t offset,
2246 struct fuse_file_info *fi)
2247{
2dc17609 2248 struct fuse_context *fc = fuse_get_context();
97f1f27b 2249 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd 2250 char *cg;
4622ad78 2251 char *memusage_str = NULL, *memstat_str = NULL,
b731895e
NW
2252 *memswlimit_str = NULL, *memswusage_str = NULL,
2253 *memswlimit_default_str = NULL, *memswusage_default_str = NULL;
4622ad78
TG
2254 unsigned long memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0,
2255 cached = 0, hosttotal = 0;
2dc17609 2256 char *line = NULL;
e1068397 2257 size_t linelen = 0, total_len = 0, rv = 0;
97f1f27b
YY
2258 char *cache = d->buf;
2259 size_t cache_size = d->buflen;
2c51f8dd 2260 FILE *f = NULL;
2dc17609 2261
97f1f27b
YY
2262 if (offset){
2263 if (offset > d->size)
2264 return -EINVAL;
b5ad2d21
SH
2265 if (!d->cached)
2266 return 0;
97f1f27b
YY
2267 int left = d->size - offset;
2268 total_len = left > size ? size: left;
2269 memcpy(buf, cache + offset, total_len);
2270 return total_len;
2271 }
2dc17609 2272
b11c6ec0 2273 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2274 if (initpid <= 0)
2275 initpid = fc->pid;
0dcc31ea 2276 cg = get_pid_cgroup(initpid, "memory");
2dc17609 2277 if (!cg)
53b43826 2278 return read_file("/proc/meminfo", buf, size, d);
2dc17609 2279
7bc95a75 2280 memlimit = get_min_memlimit(cg);
35482f91 2281 if (!cgfs_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
2c51f8dd 2282 goto err;
35482f91 2283 if (!cgfs_get_value("memory", cg, "memory.stat", &memstat_str))
2c51f8dd 2284 goto err;
4622ad78
TG
2285
2286 // Following values are allowed to fail, because swapaccount might be turned
2287 // off for current kernel
2288 if(cgfs_get_value("memory", cg, "memory.memsw.limit_in_bytes", &memswlimit_str) &&
2289 cgfs_get_value("memory", cg, "memory.memsw.usage_in_bytes", &memswusage_str))
2290 {
b731895e
NW
2291 /* If swapaccounting is turned on, then default value is assumed to be that of cgroup / */
2292 if (!cgfs_get_value("memory", "/", "memory.memsw.limit_in_bytes", &memswlimit_default_str))
2293 goto err;
2294 if (!cgfs_get_value("memory", "/", "memory.memsw.usage_in_bytes", &memswusage_default_str))
2295 goto err;
2296
4622ad78
TG
2297 memswlimit = strtoul(memswlimit_str, NULL, 10);
2298 memswusage = strtoul(memswusage_str, NULL, 10);
b731895e
NW
2299
2300 if (!strcmp(memswlimit_str, memswlimit_default_str))
a2de34ba 2301 memswlimit = 0;
b731895e 2302 if (!strcmp(memswusage_str, memswusage_default_str))
a2de34ba
SH
2303 memswusage = 0;
2304
b731895e
NW
2305 memswlimit = memswlimit / 1024;
2306 memswusage = memswusage / 1024;
4622ad78 2307 }
b731895e
NW
2308
2309 memusage = strtoul(memusage_str, NULL, 10);
2310 memlimit /= 1024;
2311 memusage /= 1024;
2312
2dc17609
SH
2313 get_mem_cached(memstat_str, &cached);
2314
2315 f = fopen("/proc/meminfo", "r");
2316 if (!f)
2c51f8dd 2317 goto err;
2dc17609
SH
2318
2319 while (getline(&line, &linelen, f) != -1) {
2320 size_t l;
2321 char *printme, lbuf[100];
2322
2323 memset(lbuf, 0, 100);
2324 if (startswith(line, "MemTotal:")) {
2325 sscanf(line+14, "%lu", &hosttotal);
2326 if (hosttotal < memlimit)
2327 memlimit = hosttotal;
2328 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
2329 printme = lbuf;
2330 } else if (startswith(line, "MemFree:")) {
2331 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
2332 printme = lbuf;
2333 } else if (startswith(line, "MemAvailable:")) {
2334 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
2335 printme = lbuf;
4622ad78
TG
2336 } else if (startswith(line, "SwapTotal:") && memswlimit > 0) {
2337 snprintf(lbuf, 100, "SwapTotal: %8lu kB\n", memswlimit - memlimit);
2338 printme = lbuf;
2339 } else if (startswith(line, "SwapFree:") && memswlimit > 0 && memswusage > 0) {
2340 snprintf(lbuf, 100, "SwapFree: %8lu kB\n",
2341 (memswlimit - memlimit) - (memswusage - memusage));
2342 printme = lbuf;
2dc17609
SH
2343 } else if (startswith(line, "Buffers:")) {
2344 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
2345 printme = lbuf;
2346 } else if (startswith(line, "Cached:")) {
2347 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
2348 printme = lbuf;
2349 } else if (startswith(line, "SwapCached:")) {
2350 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
2351 printme = lbuf;
2352 } else
2353 printme = line;
97f1f27b
YY
2354
2355 l = snprintf(cache, cache_size, "%s", printme);
e1068397
MM
2356 if (l < 0) {
2357 perror("Error writing to cache");
2358 rv = 0;
2359 goto err;
2360
2361 }
2362 if (l >= cache_size) {
2363 fprintf(stderr, "Internal error: truncated write to cache\n");
2364 rv = 0;
2365 goto err;
2366 }
2367
97f1f27b
YY
2368 cache += l;
2369 cache_size -= l;
2f919d9d 2370 total_len += l;
2dc17609
SH
2371 }
2372
b5ad2d21 2373 d->cached = 1;
97f1f27b
YY
2374 d->size = total_len;
2375 if (total_len > size ) total_len = size;
2376 memcpy(buf, d->buf, total_len);
2377
e1068397 2378 rv = total_len;
2c51f8dd
SH
2379err:
2380 if (f)
2381 fclose(f);
92c84dc4 2382 free(line);
2c51f8dd 2383 free(cg);
2c51f8dd 2384 free(memusage_str);
4622ad78
TG
2385 free(memswlimit_str);
2386 free(memswusage_str);
2c51f8dd 2387 free(memstat_str);
b731895e
NW
2388 free(memswlimit_default_str);
2389 free(memswusage_default_str);
e1068397 2390 return rv;
23ce2127
SH
2391}
2392
2393/*
2394 * Read the cpuset.cpus for cg
2c51f8dd 2395 * Return the answer in a newly allocated string which must be freed
23ce2127
SH
2396 */
2397static char *get_cpuset(const char *cg)
2398{
2399 char *answer;
2400
35482f91 2401 if (!cgfs_get_value("cpuset", cg, "cpuset.cpus", &answer))
23ce2127
SH
2402 return NULL;
2403 return answer;
2404}
2405
fa47bb52 2406bool cpu_in_cpuset(int cpu, const char *cpuset);
23ce2127 2407
aeb56147
SH
2408static bool cpuline_in_cpuset(const char *line, const char *cpuset)
2409{
2410 int cpu;
2411
2412 if (sscanf(line, "processor : %d", &cpu) != 1)
2413 return false;
2414 return cpu_in_cpuset(cpu, cpuset);
2415}
2416
23ce2127
SH
2417/*
2418 * check whether this is a '^processor" line in /proc/cpuinfo
2419 */
2420static bool is_processor_line(const char *line)
2421{
2422 int cpu;
2423
2424 if (sscanf(line, "processor : %d", &cpu) == 1)
2425 return true;
2426 return false;
2427}
2428
23ce2127
SH
2429static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
2430 struct fuse_file_info *fi)
2431{
2432 struct fuse_context *fc = fuse_get_context();
97f1f27b 2433 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2434 char *cg;
2435 char *cpuset = NULL;
23ce2127 2436 char *line = NULL;
e1068397 2437 size_t linelen = 0, total_len = 0, rv = 0;
23ce2127
SH
2438 bool am_printing = false;
2439 int curcpu = -1;
97f1f27b
YY
2440 char *cache = d->buf;
2441 size_t cache_size = d->buflen;
2c51f8dd 2442 FILE *f = NULL;
23ce2127 2443
97f1f27b
YY
2444 if (offset){
2445 if (offset > d->size)
2446 return -EINVAL;
b5ad2d21
SH
2447 if (!d->cached)
2448 return 0;
97f1f27b
YY
2449 int left = d->size - offset;
2450 total_len = left > size ? size: left;
2451 memcpy(buf, cache + offset, total_len);
2f919d9d 2452 return total_len;
97f1f27b 2453 }
23ce2127 2454
b11c6ec0 2455 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2456 if (initpid <= 0)
2457 initpid = fc->pid;
0dcc31ea 2458 cg = get_pid_cgroup(initpid, "cpuset");
23ce2127 2459 if (!cg)
53b43826 2460 return read_file("proc/cpuinfo", buf, size, d);
23ce2127
SH
2461
2462 cpuset = get_cpuset(cg);
2463 if (!cpuset)
2c51f8dd 2464 goto err;
23ce2127
SH
2465
2466 f = fopen("/proc/cpuinfo", "r");
2467 if (!f)
2c51f8dd 2468 goto err;
23ce2127
SH
2469
2470 while (getline(&line, &linelen, f) != -1) {
2471 size_t l;
2472 if (is_processor_line(line)) {
aeb56147 2473 am_printing = cpuline_in_cpuset(line, cpuset);
23ce2127
SH
2474 if (am_printing) {
2475 curcpu ++;
97f1f27b 2476 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
e1068397
MM
2477 if (l < 0) {
2478 perror("Error writing to cache");
2479 rv = 0;
2480 goto err;
2481 }
2482 if (l >= cache_size) {
2483 fprintf(stderr, "Internal error: truncated write to cache\n");
2484 rv = 0;
2485 goto err;
2486 }
711b4030
SF
2487 cache += l;
2488 cache_size -= l;
2489 total_len += l;
23ce2127
SH
2490 }
2491 continue;
2492 }
2493 if (am_printing) {
97f1f27b 2494 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2495 if (l < 0) {
2496 perror("Error writing to cache");
2497 rv = 0;
2498 goto err;
2499 }
2500 if (l >= cache_size) {
2501 fprintf(stderr, "Internal error: truncated write to cache\n");
2502 rv = 0;
2503 goto err;
2504 }
711b4030
SF
2505 cache += l;
2506 cache_size -= l;
2507 total_len += l;
23ce2127
SH
2508 }
2509 }
2510
b5ad2d21 2511 d->cached = 1;
97f1f27b
YY
2512 d->size = total_len;
2513 if (total_len > size ) total_len = size;
2514
2515 /* read from off 0 */
2516 memcpy(buf, d->buf, total_len);
e1068397 2517 rv = total_len;
2c51f8dd
SH
2518err:
2519 if (f)
2520 fclose(f);
92c84dc4 2521 free(line);
2c51f8dd
SH
2522 free(cpuset);
2523 free(cg);
e1068397 2524 return rv;
23ce2127
SH
2525}
2526
2527static int proc_stat_read(char *buf, size_t size, off_t offset,
2528 struct fuse_file_info *fi)
2529{
aeb56147 2530 struct fuse_context *fc = fuse_get_context();
97f1f27b 2531 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2532 char *cg;
2533 char *cpuset = NULL;
aeb56147 2534 char *line = NULL;
e1068397 2535 size_t linelen = 0, total_len = 0, rv = 0;
2a0fde62 2536 int curcpu = -1; /* cpu numbering starts at 0 */
97f1f27b
YY
2537 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0;
2538 unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
2539 irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0;
2540#define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
2541 char cpuall[CPUALL_MAX_SIZE];
2542 /* reserve for cpu all */
2543 char *cache = d->buf + CPUALL_MAX_SIZE;
2544 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
2c51f8dd 2545 FILE *f = NULL;
aeb56147 2546
97f1f27b
YY
2547 if (offset){
2548 if (offset > d->size)
2549 return -EINVAL;
b5ad2d21
SH
2550 if (!d->cached)
2551 return 0;
97f1f27b
YY
2552 int left = d->size - offset;
2553 total_len = left > size ? size: left;
2554 memcpy(buf, d->buf + offset, total_len);
2f919d9d 2555 return total_len;
97f1f27b 2556 }
aeb56147 2557
b11c6ec0 2558 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2559 if (initpid <= 0)
2560 initpid = fc->pid;
0dcc31ea 2561 cg = get_pid_cgroup(initpid, "cpuset");
aeb56147 2562 if (!cg)
53b43826 2563 return read_file("/proc/stat", buf, size, d);
aeb56147
SH
2564
2565 cpuset = get_cpuset(cg);
2566 if (!cpuset)
2c51f8dd 2567 goto err;
aeb56147
SH
2568
2569 f = fopen("/proc/stat", "r");
2570 if (!f)
2c51f8dd 2571 goto err;
aeb56147 2572
97f1f27b
YY
2573 //skip first line
2574 if (getline(&line, &linelen, f) < 0) {
2575 fprintf(stderr, "proc_stat_read read first line failed\n");
2c51f8dd 2576 goto err;
97f1f27b
YY
2577 }
2578
aeb56147
SH
2579 while (getline(&line, &linelen, f) != -1) {
2580 size_t l;
2581 int cpu;
2a0fde62 2582 char cpu_char[10]; /* That's a lot of cores */
aeb56147
SH
2583 char *c;
2584
2a0fde62
CB
2585 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
2586 /* not a ^cpuN line containing a number N, just print it */
97f1f27b 2587 l = snprintf(cache, cache_size, "%s", line);
e1068397
MM
2588 if (l < 0) {
2589 perror("Error writing to cache");
2590 rv = 0;
2591 goto err;
2592 }
2593 if (l >= cache_size) {
2594 fprintf(stderr, "Internal error: truncated write to cache\n");
2595 rv = 0;
2596 goto err;
2597 }
711b4030
SF
2598 cache += l;
2599 cache_size -= l;
2600 total_len += l;
2601 continue;
aeb56147 2602 }
2a0fde62
CB
2603
2604 if (sscanf(cpu_char, "%d", &cpu) != 1)
2605 continue;
aeb56147
SH
2606 if (!cpu_in_cpuset(cpu, cpuset))
2607 continue;
2608 curcpu ++;
2609
2610 c = strchr(line, ' ');
2611 if (!c)
2612 continue;
25c5e8fb 2613 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
e1068397
MM
2614 if (l < 0) {
2615 perror("Error writing to cache");
2616 rv = 0;
2617 goto err;
2618
2619 }
2620 if (l >= cache_size) {
2621 fprintf(stderr, "Internal error: truncated write to cache\n");
2622 rv = 0;
2623 goto err;
2624 }
2625
97f1f27b
YY
2626 cache += l;
2627 cache_size -= l;
aeb56147 2628 total_len += l;
2f919d9d 2629
97f1f27b
YY
2630 if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, &idle, &iowait, &irq,
2631 &softirq, &steal, &guest) != 9)
2632 continue;
2633 user_sum += user;
2634 nice_sum += nice;
2635 system_sum += system;
2636 idle_sum += idle;
2637 iowait_sum += iowait;
2638 irq_sum += irq;
2639 softirq_sum += softirq;
2640 steal_sum += steal;
2f919d9d 2641 guest_sum += guest;
97f1f27b
YY
2642 }
2643
2644 cache = d->buf;
2645
2f919d9d 2646 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
97f1f27b
YY
2647 "cpu ", user_sum, nice_sum, system_sum, idle_sum, iowait_sum, irq_sum, softirq_sum, steal_sum, guest_sum);
2648 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE){
2649 memcpy(cache, cpuall, cpuall_len);
2f919d9d 2650 cache += cpuall_len;
2c51f8dd 2651 } else{
97f1f27b
YY
2652 /* shouldn't happen */
2653 fprintf(stderr, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len);
2654 cpuall_len = 0;
2655 }
2656
2657 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
2658 total_len += cpuall_len;
b5ad2d21 2659 d->cached = 1;
97f1f27b
YY
2660 d->size = total_len;
2661 if (total_len > size ) total_len = size;
2662
2663 memcpy(buf, d->buf, total_len);
e1068397 2664 rv = total_len;
2c51f8dd
SH
2665
2666err:
2667 if (f)
2668 fclose(f);
92c84dc4 2669 free(line);
2c51f8dd
SH
2670 free(cpuset);
2671 free(cg);
e1068397 2672 return rv;
23ce2127
SH
2673}
2674
0afd85bd 2675static long int getreaperage(pid_t pid)
41bb9357
SH
2676{
2677 char fnam[100];
41bb9357 2678 struct stat sb;
0afd85bd
SH
2679 int ret;
2680 pid_t qpid;
5ca64c2a 2681
b11c6ec0 2682 qpid = lookup_initpid_in_store(pid);
87dce5f6 2683 if (qpid <= 0)
c0adec85
SH
2684 return 0;
2685
0afd85bd
SH
2686 ret = snprintf(fnam, 100, "/proc/%d", qpid);
2687 if (ret < 0 || ret >= 100)
41bb9357 2688 return 0;
ea56f722 2689
0afd85bd 2690 if (lstat(fnam, &sb) < 0)
41bb9357 2691 return 0;
41bb9357 2692
0afd85bd 2693 return time(NULL) - sb.st_ctime;
41bb9357
SH
2694}
2695
0b6af11b
SH
2696static unsigned long get_reaper_busy(pid_t task)
2697{
b11c6ec0 2698 pid_t initpid = lookup_initpid_in_store(task);
0b6af11b
SH
2699 char *cgroup = NULL, *usage_str = NULL;
2700 unsigned long usage = 0;
2701
87dce5f6 2702 if (initpid <= 0)
41bb9357 2703 return 0;
0b6af11b 2704
0dcc31ea 2705 cgroup = get_pid_cgroup(initpid, "cpuacct");
0b6af11b
SH
2706 if (!cgroup)
2707 goto out;
2708 if (!cgfs_get_value("cpuacct", cgroup, "cpuacct.usage", &usage_str))
2709 goto out;
2710 usage = strtoul(usage_str, NULL, 10);
87e96963 2711 usage /= 1000000000;
0b6af11b
SH
2712
2713out:
2714 free(cgroup);
2715 free(usage_str);
2716 return usage;
41bb9357
SH
2717}
2718
2719/*
2720 * We read /proc/uptime and reuse its second field.
2721 * For the first field, we use the mtime for the reaper for
2722 * the calling pid as returned by getreaperage
2723 */
23ce2127
SH
2724static int proc_uptime_read(char *buf, size_t size, off_t offset,
2725 struct fuse_file_info *fi)
2726{
41bb9357 2727 struct fuse_context *fc = fuse_get_context();
97f1f27b 2728 struct file_info *d = (struct file_info *)fi->fh;
ff96a5f9 2729 long int reaperage = getreaperage(fc->pid);
0b6af11b 2730 unsigned long int busytime = get_reaper_busy(fc->pid), idletime;
b5ad2d21 2731 char *cache = d->buf;
97f1f27b 2732 size_t total_len = 0;
41bb9357 2733
97f1f27b
YY
2734 if (offset){
2735 if (offset > d->size)
2736 return -EINVAL;
b5ad2d21
SH
2737 if (!d->cached)
2738 return 0;
2739 int left = d->size - offset;
2740 total_len = left > size ? size: left;
2741 memcpy(buf, cache + offset, total_len);
2742 return total_len;
97f1f27b
YY
2743 }
2744
0b6af11b 2745 idletime = reaperage - busytime;
f6c0b279
SH
2746 if (idletime > reaperage)
2747 idletime = reaperage;
2748
b5ad2d21 2749 total_len = snprintf(d->buf, d->size, "%ld.0 %lu.0\n", reaperage, idletime);
e1068397
MM
2750 if (total_len < 0){
2751 perror("Error writing to cache");
2752 return 0;
2753 }
cdcdb29b 2754
b5ad2d21
SH
2755 d->size = (int)total_len;
2756 d->cached = 1;
2757
2758 if (total_len > size) total_len = size;
2759
2760 memcpy(buf, d->buf, total_len);
97f1f27b 2761 return total_len;
23ce2127
SH
2762}
2763
49878439
YY
2764static int proc_diskstats_read(char *buf, size_t size, off_t offset,
2765 struct fuse_file_info *fi)
2766{
2767 char dev_name[72];
2768 struct fuse_context *fc = fuse_get_context();
97f1f27b 2769 struct file_info *d = (struct file_info *)fi->fh;
2c51f8dd
SH
2770 char *cg;
2771 char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
49878439
YY
2772 *io_wait_time_str = NULL, *io_service_time_str = NULL;
2773 unsigned long read = 0, write = 0;
2774 unsigned long read_merged = 0, write_merged = 0;
2775 unsigned long read_sectors = 0, write_sectors = 0;
2776 unsigned long read_ticks = 0, write_ticks = 0;
2777 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
2778 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
b5ad2d21
SH
2779 char *cache = d->buf;
2780 size_t cache_size = d->buflen;
49878439 2781 char *line = NULL;
e1068397 2782 size_t linelen = 0, total_len = 0, rv = 0;
49878439
YY
2783 unsigned int major = 0, minor = 0;
2784 int i = 0;
2c51f8dd 2785 FILE *f = NULL;
49878439 2786
97f1f27b
YY
2787 if (offset){
2788 if (offset > d->size)
2789 return -EINVAL;
b5ad2d21
SH
2790 if (!d->cached)
2791 return 0;
2792 int left = d->size - offset;
2793 total_len = left > size ? size: left;
2794 memcpy(buf, cache + offset, total_len);
2795 return total_len;
97f1f27b 2796 }
49878439 2797
b11c6ec0 2798 pid_t initpid = lookup_initpid_in_store(fc->pid);
87dce5f6
SH
2799 if (initpid <= 0)
2800 initpid = fc->pid;
0dcc31ea 2801 cg = get_pid_cgroup(initpid, "blkio");
49878439 2802 if (!cg)
53b43826 2803 return read_file("/proc/diskstats", buf, size, d);
49878439 2804
35482f91 2805 if (!cgfs_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
2c51f8dd 2806 goto err;
35482f91 2807 if (!cgfs_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
2c51f8dd 2808 goto err;
35482f91 2809 if (!cgfs_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
2c51f8dd 2810 goto err;
35482f91 2811 if (!cgfs_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
2c51f8dd 2812 goto err;
35482f91 2813 if (!cgfs_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
2c51f8dd 2814 goto err;
49878439
YY
2815
2816
2817 f = fopen("/proc/diskstats", "r");
2818 if (!f)
2c51f8dd 2819 goto err;
49878439
YY
2820
2821 while (getline(&line, &linelen, f) != -1) {
2822 size_t l;
2823 char *printme, lbuf[256];
2824
c0adec85 2825 i = sscanf(line, "%u %u %71s", &major, &minor, dev_name);
49878439
YY
2826 if(i == 3){
2827 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
2828 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
2829 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
2830 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
2831 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
2832 read_sectors = read_sectors/512;
2833 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
2834 write_sectors = write_sectors/512;
2f919d9d 2835
49878439
YY
2836 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
2837 rd_svctm = rd_svctm/1000000;
2838 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
2839 rd_wait = rd_wait/1000000;
2840 read_ticks = rd_svctm + rd_wait;
2841
2842 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
2843 wr_svctm = wr_svctm/1000000;
2844 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
2845 wr_wait = wr_wait/1000000;
2846 write_ticks = wr_svctm + wr_wait;
2847
2848 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
2849 tot_ticks = tot_ticks/1000000;
2850 }else{
2851 continue;
2852 }
2853
2854 memset(lbuf, 0, 256);
2855 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
2f919d9d 2856 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
49878439
YY
2857 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
2858 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
2859 printme = lbuf;
2860 } else
2861 continue;
2862
b5ad2d21 2863 l = snprintf(cache, cache_size, "%s", printme);
e1068397
MM
2864 if (l < 0) {
2865 perror("Error writing to fuse buf");
2866 rv = 0;
2867 goto err;
2868 }
b5ad2d21 2869 if (l >= cache_size) {
e1068397
MM
2870 fprintf(stderr, "Internal error: truncated write to cache\n");
2871 rv = 0;
2872 goto err;
2873 }
b5ad2d21
SH
2874 cache += l;
2875 cache_size -= l;
49878439
YY
2876 total_len += l;
2877 }
2878
b5ad2d21 2879 d->cached = 1;
97f1f27b 2880 d->size = total_len;
b5ad2d21
SH
2881 if (total_len > size ) total_len = size;
2882 memcpy(buf, d->buf, total_len);
2883
e1068397 2884 rv = total_len;
2c51f8dd
SH
2885err:
2886 free(cg);
2887 if (f)
2888 fclose(f);
49878439 2889 free(line);
2c51f8dd
SH
2890 free(io_serviced_str);
2891 free(io_merged_str);
2892 free(io_service_bytes_str);
2893 free(io_wait_time_str);
2894 free(io_service_time_str);
e1068397 2895 return rv;
49878439
YY
2896}
2897
23ce2127
SH
2898static off_t get_procfile_size(const char *which)
2899{
2900 FILE *f = fopen(which, "r");
2901 char *line = NULL;
2902 size_t len = 0;
2903 ssize_t sz, answer = 0;
2904 if (!f)
2905 return 0;
2906
2907 while ((sz = getline(&line, &len, f)) != -1)
2908 answer += sz;
2909 fclose (f);
92c84dc4 2910 free(line);
23ce2127
SH
2911
2912 return answer;
2913}
2914
758ad80c
SH
2915static int proc_getattr(const char *path, struct stat *sb)
2916{
35629743
SH
2917 struct timespec now;
2918
2919 memset(sb, 0, sizeof(struct stat));
2920 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
2921 return -EINVAL;
2922 sb->st_uid = sb->st_gid = 0;
2923 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
2924 if (strcmp(path, "/proc") == 0) {
2925 sb->st_mode = S_IFDIR | 00555;
2926 sb->st_nlink = 2;
2927 return 0;
2928 }
2929 if (strcmp(path, "/proc/meminfo") == 0 ||
2930 strcmp(path, "/proc/cpuinfo") == 0 ||
2931 strcmp(path, "/proc/uptime") == 0 ||
49878439
YY
2932 strcmp(path, "/proc/stat") == 0 ||
2933 strcmp(path, "/proc/diskstats") == 0) {
7253e0a4 2934 sb->st_size = 0;
35629743
SH
2935 sb->st_mode = S_IFREG | 00444;
2936 sb->st_nlink = 1;
2937 return 0;
2938 }
2939
2940 return -ENOENT;
2941}
2942
2943static int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
2944 struct fuse_file_info *fi)
2945{
2946 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
2947 filler(buf, "meminfo", NULL, 0) != 0 ||
2948 filler(buf, "stat", NULL, 0) != 0 ||
49878439
YY
2949 filler(buf, "uptime", NULL, 0) != 0 ||
2950 filler(buf, "diskstats", NULL, 0) != 0)
758ad80c 2951 return -EINVAL;
758ad80c
SH
2952 return 0;
2953}
2954
35629743
SH
2955static int proc_open(const char *path, struct fuse_file_info *fi)
2956{
96fc5ee6
SH
2957 int type = -1;
2958 struct file_info *info;
2959
2960 if (strcmp(path, "/proc/meminfo") == 0)
2961 type = LXC_TYPE_PROC_MEMINFO;
2962 else if (strcmp(path, "/proc/cpuinfo") == 0)
2963 type = LXC_TYPE_PROC_CPUINFO;
2964 else if (strcmp(path, "/proc/uptime") == 0)
2965 type = LXC_TYPE_PROC_UPTIME;
2966 else if (strcmp(path, "/proc/stat") == 0)
2967 type = LXC_TYPE_PROC_STAT;
2968 else if (strcmp(path, "/proc/diskstats") == 0)
2969 type = LXC_TYPE_PROC_DISKSTATS;
2970 if (type == -1)
2971 return -ENOENT;
2972
2c51f8dd
SH
2973 info = malloc(sizeof(*info));
2974 if (!info)
2975 return -ENOMEM;
2976
96fc5ee6
SH
2977 memset(info, 0, sizeof(*info));
2978 info->type = type;
2979
97f1f27b 2980 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
2c51f8dd
SH
2981 do {
2982 info->buf = malloc(info->buflen);
2983 } while (!info->buf);
97f1f27b
YY
2984 memset(info->buf, 0, info->buflen);
2985 /* set actual size to buffer size */
2f919d9d 2986 info->size = info->buflen;
97f1f27b 2987
96fc5ee6
SH
2988 fi->fh = (unsigned long)info;
2989 return 0;
2990}
2991
2992static int proc_release(const char *path, struct fuse_file_info *fi)
2993{
2994 struct file_info *f = (struct file_info *)fi->fh;
2995
2996 do_release_file_info(f);
2997 return 0;
35629743
SH
2998}
2999
35629743
SH
3000static int proc_read(const char *path, char *buf, size_t size, off_t offset,
3001 struct fuse_file_info *fi)
3002{
96fc5ee6
SH
3003 struct file_info *f = (struct file_info *) fi->fh;
3004
3005 switch (f->type) {
2f919d9d 3006 case LXC_TYPE_PROC_MEMINFO:
23ce2127 3007 return proc_meminfo_read(buf, size, offset, fi);
96fc5ee6 3008 case LXC_TYPE_PROC_CPUINFO:
23ce2127 3009 return proc_cpuinfo_read(buf, size, offset, fi);
96fc5ee6 3010 case LXC_TYPE_PROC_UPTIME:
23ce2127 3011 return proc_uptime_read(buf, size, offset, fi);
96fc5ee6 3012 case LXC_TYPE_PROC_STAT:
23ce2127 3013 return proc_stat_read(buf, size, offset, fi);
96fc5ee6 3014 case LXC_TYPE_PROC_DISKSTATS:
49878439 3015 return proc_diskstats_read(buf, size, offset, fi);
96fc5ee6
SH
3016 default:
3017 return -EINVAL;
3018 }
35629743
SH
3019}
3020
2ad6d2bd
SH
3021/*
3022 * FUSE ops for /
3023 * these just delegate to the /proc and /cgroup ops as
3024 * needed
3025 */
758ad80c
SH
3026
3027static int lxcfs_getattr(const char *path, struct stat *sb)
3028{
3029 if (strcmp(path, "/") == 0) {
3030 sb->st_mode = S_IFDIR | 00755;
3031 sb->st_nlink = 2;
3032 return 0;
3033 }
3034 if (strncmp(path, "/cgroup", 7) == 0) {
3035 return cg_getattr(path, sb);
3036 }
35629743 3037 if (strncmp(path, "/proc", 5) == 0) {
758ad80c
SH
3038 return proc_getattr(path, sb);
3039 }
3040 return -EINVAL;
3041}
3042
3043static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
3044{
3045 if (strcmp(path, "/") == 0)
3046 return 0;
3047
3048 if (strncmp(path, "/cgroup", 7) == 0) {
3049 return cg_opendir(path, fi);
3050 }
35629743
SH
3051 if (strcmp(path, "/proc") == 0)
3052 return 0;
3053 return -ENOENT;
758ad80c
SH
3054}
3055
3056static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
3057 struct fuse_file_info *fi)
3058{
3059 if (strcmp(path, "/") == 0) {
3060 if (filler(buf, "proc", NULL, 0) != 0 ||
3061 filler(buf, "cgroup", NULL, 0) != 0)
3062 return -EINVAL;
3063 return 0;
3064 }
35629743 3065 if (strncmp(path, "/cgroup", 7) == 0)
758ad80c 3066 return cg_readdir(path, buf, filler, offset, fi);
35629743
SH
3067 if (strcmp(path, "/proc") == 0)
3068 return proc_readdir(path, buf, filler, offset, fi);
758ad80c
SH
3069 return -EINVAL;
3070}
3071
3072static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
3073{
3074 if (strcmp(path, "/") == 0)
3075 return 0;
3076 if (strncmp(path, "/cgroup", 7) == 0) {
3077 return cg_releasedir(path, fi);
3078 }
35629743
SH
3079 if (strcmp(path, "/proc") == 0)
3080 return 0;
758ad80c
SH
3081 return -EINVAL;
3082}
3083
99978832
SH
3084static int lxcfs_open(const char *path, struct fuse_file_info *fi)
3085{
35629743 3086 if (strncmp(path, "/cgroup", 7) == 0)
99978832 3087 return cg_open(path, fi);
35629743
SH
3088 if (strncmp(path, "/proc", 5) == 0)
3089 return proc_open(path, fi);
99978832
SH
3090
3091 return -EINVAL;
3092}
3093
3094static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset,
3095 struct fuse_file_info *fi)
3096{
35629743 3097 if (strncmp(path, "/cgroup", 7) == 0)
99978832 3098 return cg_read(path, buf, size, offset, fi);
35629743
SH
3099 if (strncmp(path, "/proc", 5) == 0)
3100 return proc_read(path, buf, size, offset, fi);
99978832
SH
3101
3102 return -EINVAL;
3103}
3104
2ad6d2bd
SH
3105int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset,
3106 struct fuse_file_info *fi)
3107{
3108 if (strncmp(path, "/cgroup", 7) == 0) {
3109 return cg_write(path, buf, size, offset, fi);
3110 }
3111
3112 return -EINVAL;
3113}
3114
99978832
SH
3115static int lxcfs_flush(const char *path, struct fuse_file_info *fi)
3116{
3117 return 0;
3118}
3119
3120static int lxcfs_release(const char *path, struct fuse_file_info *fi)
758ad80c 3121{
8f6e8f5e
SH
3122 if (strncmp(path, "/cgroup", 7) == 0)
3123 return cg_release(path, fi);
8f6e8f5e 3124 if (strncmp(path, "/proc", 5) == 0)
96fc5ee6 3125 return proc_release(path, fi);
8f6e8f5e
SH
3126
3127 return -EINVAL;
99978832
SH
3128}
3129
3130static int lxcfs_fsync(const char *path, int datasync, struct fuse_file_info *fi)
3131{
3132 return 0;
758ad80c
SH
3133}
3134
ab54b798
SH
3135int lxcfs_mkdir(const char *path, mode_t mode)
3136{
3137 if (strncmp(path, "/cgroup", 7) == 0)
3138 return cg_mkdir(path, mode);
3139
3140 return -EINVAL;
3141}
3142
341b21ad
SH
3143int lxcfs_chown(const char *path, uid_t uid, gid_t gid)
3144{
3145 if (strncmp(path, "/cgroup", 7) == 0)
3146 return cg_chown(path, uid, gid);
3147
3148 return -EINVAL;
3149}
3150
2ad6d2bd
SH
3151/*
3152 * cat first does a truncate before doing ops->write. This doesn't
3153 * really make sense for cgroups. So just return 0 always but do
3154 * nothing.
3155 */
3156int lxcfs_truncate(const char *path, off_t newsize)
3157{
3158 if (strncmp(path, "/cgroup", 7) == 0)
3159 return 0;
3160 return -EINVAL;
3161}
3162
50d8d5b5
SH
3163int lxcfs_rmdir(const char *path)
3164{
3165 if (strncmp(path, "/cgroup", 7) == 0)
3166 return cg_rmdir(path);
3167 return -EINVAL;
3168}
3169
fd2e4e03
SH
3170int lxcfs_chmod(const char *path, mode_t mode)
3171{
3172 if (strncmp(path, "/cgroup", 7) == 0)
3173 return cg_chmod(path, mode);
3174 return -EINVAL;
3175}
3176
758ad80c
SH
3177const struct fuse_operations lxcfs_ops = {
3178 .getattr = lxcfs_getattr,
3179 .readlink = NULL,
3180 .getdir = NULL,
3181 .mknod = NULL,
ab54b798 3182 .mkdir = lxcfs_mkdir,
758ad80c 3183 .unlink = NULL,
50d8d5b5 3184 .rmdir = lxcfs_rmdir,
758ad80c
SH
3185 .symlink = NULL,
3186 .rename = NULL,
3187 .link = NULL,
fd2e4e03 3188 .chmod = lxcfs_chmod,
341b21ad 3189 .chown = lxcfs_chown,
2ad6d2bd 3190 .truncate = lxcfs_truncate,
758ad80c 3191 .utime = NULL,
99978832
SH
3192
3193 .open = lxcfs_open,
3194 .read = lxcfs_read,
3195 .release = lxcfs_release,
2ad6d2bd 3196 .write = lxcfs_write,
99978832 3197
758ad80c 3198 .statfs = NULL,
99978832
SH
3199 .flush = lxcfs_flush,
3200 .fsync = lxcfs_fsync,
758ad80c
SH
3201
3202 .setxattr = NULL,
3203 .getxattr = NULL,
3204 .listxattr = NULL,
3205 .removexattr = NULL,
3206
3207 .opendir = lxcfs_opendir,
3208 .readdir = lxcfs_readdir,
3209 .releasedir = lxcfs_releasedir,
3210
3211 .fsyncdir = NULL,
3212 .init = NULL,
3213 .destroy = NULL,
3214 .access = NULL,
3215 .create = NULL,
3216 .ftruncate = NULL,
3217 .fgetattr = NULL,
3218};
3219
99978832 3220static void usage(const char *me)
758ad80c
SH
3221{
3222 fprintf(stderr, "Usage:\n");
3223 fprintf(stderr, "\n");
0b0f73db
SH
3224 fprintf(stderr, "%s mountpoint\n", me);
3225 fprintf(stderr, "%s -h\n", me);
758ad80c
SH
3226 exit(1);
3227}
3228
99978832 3229static bool is_help(char *w)
758ad80c
SH
3230{
3231 if (strcmp(w, "-h") == 0 ||
3232 strcmp(w, "--help") == 0 ||
3233 strcmp(w, "-help") == 0 ||
3234 strcmp(w, "help") == 0)
3235 return true;
3236 return false;
3237}
3238
0b0f73db
SH
3239void swallow_arg(int *argcp, char *argv[], char *which)
3240{
3241 int i;
3242
3243 for (i = 1; argv[i]; i++) {
3244 if (strcmp(argv[i], which) != 0)
3245 continue;
3246 for (; argv[i]; i++) {
3247 argv[i] = argv[i+1];
3248 }
3249 (*argcp)--;
3250 return;
3251 }
3252}
3253
3254void swallow_option(int *argcp, char *argv[], char *opt, char *v)
3255{
3256 int i;
3257
3258 for (i = 1; argv[i]; i++) {
3259 if (!argv[i+1])
3260 continue;
3261 if (strcmp(argv[i], opt) != 0)
3262 continue;
3263 if (strcmp(argv[i+1], v) != 0) {
3264 fprintf(stderr, "Warning: unexpected fuse option %s\n", v);
3265 exit(1);
3266 }
3267 for (; argv[i+1]; i++) {
3268 argv[i] = argv[i+2];
3269 }
3270 (*argcp) -= 2;
3271 return;
3272 }
3273}
3274
758ad80c
SH
3275int main(int argc, char *argv[])
3276{
c0adec85 3277 int ret = -1;
0b0f73db
SH
3278 /*
3279 * what we pass to fuse_main is:
3280 * argv[0] -s -f -o allow_other,directio argv[1] NULL
3281 */
2c51f8dd
SH
3282 int nargs = 5, cnt = 0;
3283 char *newargv[6];
758ad80c 3284
977ac879 3285#ifdef FORTRAVIS
df062bcb
SH
3286 /* for travis which runs on 12.04 */
3287 if (glib_check_version (2, 36, 0) != NULL)
3288 g_type_init ();
977ac879 3289#endif
df062bcb 3290
0b0f73db
SH
3291 /* accomodate older init scripts */
3292 swallow_arg(&argc, argv, "-s");
3293 swallow_arg(&argc, argv, "-f");
3294 swallow_option(&argc, argv, "-o", "allow_other");
3295
2e9c0b32
SH
3296 if (argc == 2 && strcmp(argv[1], "--version") == 0) {
3297 fprintf(stderr, "%s\n", VERSION);
3298 exit(0);
3299 }
0b0f73db 3300 if (argc != 2 || is_help(argv[1]))
758ad80c
SH
3301 usage(argv[0]);
3302
38a76a91 3303 newargv[cnt++] = argv[0];
38a76a91
SH
3304 newargv[cnt++] = "-f";
3305 newargv[cnt++] = "-o";
f466a31e 3306 newargv[cnt++] = "allow_other,direct_io,entry_timeout=0.5,attr_timeout=0.5";
38a76a91
SH
3307 newargv[cnt++] = argv[1];
3308 newargv[cnt++] = NULL;
758ad80c 3309
35482f91 3310 if (!cgfs_setup_controllers())
c0adec85 3311 goto out;
758ad80c 3312
35482f91 3313 ret = fuse_main(nargs, newargv, &lxcfs_ops, NULL);
758ad80c 3314
c0adec85 3315out:
758ad80c 3316 return ret;
2183082c 3317}