]> git.proxmox.com Git - mirror_lxcfs.git/blame - bindings.c
configure.ac: release 2.0.0.beta1
[mirror_lxcfs.git] / bindings.c
CommitLineData
237e200e
SH
1/* lxcfs
2 *
3 * Copyright © 2014-2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
5 *
6 * See COPYING file for details.
7 */
8
9#define FUSE_USE_VERSION 26
10
11#include <stdio.h>
12#include <dirent.h>
13#include <fcntl.h>
14#include <fuse.h>
15#include <unistd.h>
16#include <errno.h>
17#include <stdbool.h>
18#include <time.h>
19#include <string.h>
20#include <stdlib.h>
21#include <libgen.h>
22#include <sched.h>
23#include <pthread.h>
24#include <linux/sched.h>
25#include <sys/param.h>
26#include <sys/socket.h>
27#include <sys/mount.h>
28#include <sys/epoll.h>
29#include <wait.h>
30
237e200e
SH
31#include "bindings.h"
32
33#include "config.h" // for VERSION
34
35enum {
36 LXC_TYPE_CGDIR,
37 LXC_TYPE_CGFILE,
38 LXC_TYPE_PROC_MEMINFO,
39 LXC_TYPE_PROC_CPUINFO,
40 LXC_TYPE_PROC_UPTIME,
41 LXC_TYPE_PROC_STAT,
42 LXC_TYPE_PROC_DISKSTATS,
70dcc12e 43 LXC_TYPE_PROC_SWAPS,
237e200e
SH
44};
45
46struct file_info {
47 char *controller;
48 char *cgroup;
49 char *file;
50 int type;
51 char *buf; // unused as of yet
52 int buflen;
53 int size; //actual data size
54 int cached;
55};
56
57/* reserve buffer size, for cpuall in /proc/stat */
58#define BUF_RESERVE_SIZE 256
59
60/*
61 * A table caching which pid is init for a pid namespace.
62 * When looking up which pid is init for $qpid, we first
63 * 1. Stat /proc/$qpid/ns/pid.
64 * 2. Check whether the ino_t is in our store.
65 * a. if not, fork a child in qpid's ns to send us
66 * ucred.pid = 1, and read the initpid. Cache
67 * initpid and creation time for /proc/initpid
68 * in a new store entry.
69 * b. if so, verify that /proc/initpid still matches
70 * what we have saved. If not, clear the store
71 * entry and go back to a. If so, return the
72 * cached initpid.
73 */
74struct pidns_init_store {
75 ino_t ino; // inode number for /proc/$pid/ns/pid
76 pid_t initpid; // the pid of nit in that ns
77 long int ctime; // the time at which /proc/$initpid was created
78 struct pidns_init_store *next;
79 long int lastcheck;
80};
81
82/* lol - look at how they are allocated in the kernel */
83#define PIDNS_HASH_SIZE 4096
84#define HASH(x) ((x) % PIDNS_HASH_SIZE)
85
86static struct pidns_init_store *pidns_hash_table[PIDNS_HASH_SIZE];
87static pthread_mutex_t pidns_store_mutex = PTHREAD_MUTEX_INITIALIZER;
88static void lock_mutex(pthread_mutex_t *l)
89{
90 int ret;
91
92 if ((ret = pthread_mutex_lock(l)) != 0) {
93 fprintf(stderr, "pthread_mutex_lock returned:%d %s\n", ret, strerror(ret));
94 exit(1);
95 }
96}
97
98static void unlock_mutex(pthread_mutex_t *l)
99{
100 int ret;
101
102 if ((ret = pthread_mutex_unlock(l)) != 0) {
103 fprintf(stderr, "pthread_mutex_unlock returned:%d %s\n", ret, strerror(ret));
104 exit(1);
105 }
106}
107
108static void store_lock(void)
109{
110 lock_mutex(&pidns_store_mutex);
111}
112
113static void store_unlock(void)
114{
115 unlock_mutex(&pidns_store_mutex);
116}
117
118/* Must be called under store_lock */
119static bool initpid_still_valid(struct pidns_init_store *e, struct stat *nsfdsb)
120{
121 struct stat initsb;
122 char fnam[100];
123
124 snprintf(fnam, 100, "/proc/%d", e->initpid);
125 if (stat(fnam, &initsb) < 0)
126 return false;
127#if DEBUG
128 fprintf(stderr, "comparing ctime %ld %ld for pid %d\n",
129 e->ctime, initsb.st_ctime, e->initpid);
130#endif
131 if (e->ctime != initsb.st_ctime)
132 return false;
133 return true;
134}
135
136/* Must be called under store_lock */
137static void remove_initpid(struct pidns_init_store *e)
138{
139 struct pidns_init_store *tmp;
140 int h;
141
142#if DEBUG
143 fprintf(stderr, "remove_initpid: removing entry for %d\n", e->initpid);
144#endif
145 h = HASH(e->ino);
146 if (pidns_hash_table[h] == e) {
147 pidns_hash_table[h] = e->next;
148 free(e);
149 return;
150 }
151
152 tmp = pidns_hash_table[h];
153 while (tmp) {
154 if (tmp->next == e) {
155 tmp->next = e->next;
156 free(e);
157 return;
158 }
159 tmp = tmp->next;
160 }
161}
162
163#define PURGE_SECS 5
164/* Must be called under store_lock */
165static void prune_initpid_store(void)
166{
167 static long int last_prune = 0;
168 struct pidns_init_store *e, *prev, *delme;
169 long int now, threshold;
170 int i;
171
172 if (!last_prune) {
173 last_prune = time(NULL);
174 return;
175 }
176 now = time(NULL);
177 if (now < last_prune + PURGE_SECS)
178 return;
179#if DEBUG
180 fprintf(stderr, "pruning\n");
181#endif
182 last_prune = now;
183 threshold = now - 2 * PURGE_SECS;
184
185 for (i = 0; i < PIDNS_HASH_SIZE; i++) {
186 for (prev = NULL, e = pidns_hash_table[i]; e; ) {
187 if (e->lastcheck < threshold) {
188#if DEBUG
189 fprintf(stderr, "Removing cached entry for %d\n", e->initpid);
190#endif
191 delme = e;
192 if (prev)
193 prev->next = e->next;
194 else
195 pidns_hash_table[i] = e->next;
196 e = e->next;
197 free(delme);
198 } else {
199 prev = e;
200 e = e->next;
201 }
202 }
203 }
204}
205
206/* Must be called under store_lock */
207static void save_initpid(struct stat *sb, pid_t pid)
208{
209 struct pidns_init_store *e;
210 char fpath[100];
211 struct stat procsb;
212 int h;
213
214#if DEBUG
215 fprintf(stderr, "save_initpid: adding entry for %d\n", pid);
216#endif
217 snprintf(fpath, 100, "/proc/%d", pid);
218 if (stat(fpath, &procsb) < 0)
219 return;
220 do {
221 e = malloc(sizeof(*e));
222 } while (!e);
223 e->ino = sb->st_ino;
224 e->initpid = pid;
225 e->ctime = procsb.st_ctime;
226 h = HASH(e->ino);
227 e->next = pidns_hash_table[h];
228 e->lastcheck = time(NULL);
229 pidns_hash_table[h] = e;
230}
231
232/*
233 * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store
234 * entry for the inode number and creation time. Verify that the init pid
235 * is still valid. If not, remove it. Return the entry if valid, NULL
236 * otherwise.
237 * Must be called under store_lock
238 */
239static struct pidns_init_store *lookup_verify_initpid(struct stat *sb)
240{
241 int h = HASH(sb->st_ino);
242 struct pidns_init_store *e = pidns_hash_table[h];
243
244 while (e) {
245 if (e->ino == sb->st_ino) {
246 if (initpid_still_valid(e, sb)) {
247 e->lastcheck = time(NULL);
248 return e;
249 }
250 remove_initpid(e);
251 return NULL;
252 }
253 e = e->next;
254 }
255
256 return NULL;
257}
258
259static int is_dir(const char *path)
260{
261 struct stat statbuf;
262 int ret = stat(path, &statbuf);
263 if (ret == 0 && S_ISDIR(statbuf.st_mode))
264 return 1;
265 return 0;
266}
267
268static char *must_copy_string(const char *str)
269{
270 char *dup = NULL;
271 if (!str)
272 return NULL;
273 do {
274 dup = strdup(str);
275 } while (!dup);
276
277 return dup;
278}
279
280static inline void drop_trailing_newlines(char *s)
281{
282 int l;
283
284 for (l=strlen(s); l>0 && s[l-1] == '\n'; l--)
285 s[l-1] = '\0';
286}
287
288#define BATCH_SIZE 50
289static void dorealloc(char **mem, size_t oldlen, size_t newlen)
290{
291 int newbatches = (newlen / BATCH_SIZE) + 1;
292 int oldbatches = (oldlen / BATCH_SIZE) + 1;
293
294 if (!*mem || newbatches > oldbatches) {
295 char *tmp;
296 do {
297 tmp = realloc(*mem, newbatches * BATCH_SIZE);
298 } while (!tmp);
299 *mem = tmp;
300 }
301}
302static void append_line(char **contents, size_t *len, char *line, ssize_t linelen)
303{
304 size_t newlen = *len + linelen;
305 dorealloc(contents, *len, newlen + 1);
306 memcpy(*contents + *len, line, linelen+1);
307 *len = newlen;
308}
309
310static char *slurp_file(const char *from)
311{
312 char *line = NULL;
313 char *contents = NULL;
314 FILE *f = fopen(from, "r");
315 size_t len = 0, fulllen = 0;
316 ssize_t linelen;
317
318 if (!f)
319 return NULL;
320
321 while ((linelen = getline(&line, &len, f)) != -1) {
322 append_line(&contents, &fulllen, line, linelen);
323 }
324 fclose(f);
325
326 if (contents)
327 drop_trailing_newlines(contents);
328 free(line);
329 return contents;
330}
331
332static bool write_string(const char *fnam, const char *string)
333{
334 FILE *f;
335 size_t len, ret;
336
337 if (!(f = fopen(fnam, "w")))
338 return false;
339 len = strlen(string);
340 ret = fwrite(string, 1, len, f);
341 if (ret != len) {
342 fprintf(stderr, "Error writing to file: %s\n", strerror(errno));
343 fclose(f);
344 return false;
345 }
346 if (fclose(f) < 0) {
347 fprintf(stderr, "Error writing to file: %s\n", strerror(errno));
348 return false;
349 }
350 return true;
351}
352
353/*
354 * hierarchies, i.e. 'cpu,cpuacct'
355 */
356char **hierarchies;
357int num_hierarchies;
358
359struct cgfs_files {
360 char *name;
361 uint32_t uid, gid;
362 uint32_t mode;
363};
364
0619767c 365#define ALLOC_NUM 20
237e200e
SH
366static bool store_hierarchy(char *stridx, char *h)
367{
0619767c
SH
368 if (num_hierarchies % ALLOC_NUM == 0) {
369 size_t n = (num_hierarchies / ALLOC_NUM) + 1;
370 n *= ALLOC_NUM;
371 char **tmp = realloc(hierarchies, n * sizeof(char *));
0619767c
SH
372 if (!tmp) {
373 fprintf(stderr, "Out of memory\n");
374 exit(1);
375 }
237e200e 376 hierarchies = tmp;
237e200e
SH
377 }
378
0619767c 379 hierarchies[num_hierarchies++] = must_copy_string(h);
237e200e
SH
380 return true;
381}
382
383static void print_subsystems(void)
384{
385 int i;
386
387 fprintf(stderr, "hierarchies:");
388 for (i = 0; i < num_hierarchies; i++) {
389 if (hierarchies[i])
390 fprintf(stderr, " %d: %s\n", i, hierarchies[i]);
391 }
392}
393
394static bool in_comma_list(const char *needle, const char *haystack)
395{
396 const char *s = haystack, *e;
397 size_t nlen = strlen(needle);
398
399 while (*s && (e = index(s, ','))) {
400 if (nlen != e - s) {
401 s = e + 1;
402 continue;
403 }
404 if (strncmp(needle, s, nlen) == 0)
405 return true;
406 s = e + 1;
407 }
408 if (strcmp(needle, s) == 0)
409 return true;
410 return false;
411}
412
413/* do we need to do any massaging here? I'm not sure... */
414static char *find_mounted_controller(const char *controller)
415{
416 int i;
417
418 for (i = 0; i < num_hierarchies; i++) {
419 if (!hierarchies[i])
420 continue;
421 if (strcmp(hierarchies[i], controller) == 0)
422 return hierarchies[i];
423 if (in_comma_list(controller, hierarchies[i]))
424 return hierarchies[i];
425 }
426
427 return NULL;
428}
429
430bool cgfs_set_value(const char *controller, const char *cgroup, const char *file,
431 const char *value)
432{
433 size_t len;
434 char *fnam, *tmpc = find_mounted_controller(controller);
435
436 if (!tmpc)
437 return false;
438 /* basedir / tmpc / cgroup / file \0 */
439 len = strlen(basedir) + strlen(tmpc) + strlen(cgroup) + strlen(file) + 4;
440 fnam = alloca(len);
441 snprintf(fnam, len, "%s/%s/%s/%s", basedir, tmpc, cgroup, file);
442
443 return write_string(fnam, value);
444}
445
446// Chown all the files in the cgroup directory. We do this when we create
447// a cgroup on behalf of a user.
448static void chown_all_cgroup_files(const char *dirname, uid_t uid, gid_t gid)
449{
450 struct dirent dirent, *direntp;
451 char path[MAXPATHLEN];
452 size_t len;
453 DIR *d;
454 int ret;
455
456 len = strlen(dirname);
457 if (len >= MAXPATHLEN) {
458 fprintf(stderr, "chown_all_cgroup_files: pathname too long: %s\n", dirname);
459 return;
460 }
461
462 d = opendir(dirname);
463 if (!d) {
464 fprintf(stderr, "chown_all_cgroup_files: failed to open %s\n", dirname);
465 return;
466 }
467
468 while (readdir_r(d, &dirent, &direntp) == 0 && direntp) {
469 if (!strcmp(direntp->d_name, ".") || !strcmp(direntp->d_name, ".."))
470 continue;
471 ret = snprintf(path, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
472 if (ret < 0 || ret >= MAXPATHLEN) {
473 fprintf(stderr, "chown_all_cgroup_files: pathname too long under %s\n", dirname);
474 continue;
475 }
476 if (chown(path, uid, gid) < 0)
477 fprintf(stderr, "Failed to chown file %s to %u:%u", path, uid, gid);
478 }
479 closedir(d);
480}
481
482int cgfs_create(const char *controller, const char *cg, uid_t uid, gid_t gid)
483{
484 size_t len;
485 char *dirnam, *tmpc = find_mounted_controller(controller);
486
487 if (!tmpc)
488 return -EINVAL;
489 /* basedir / tmpc / cg \0 */
490 len = strlen(basedir) + strlen(tmpc) + strlen(cg) + 3;
491 dirnam = alloca(len);
492 snprintf(dirnam, len, "%s/%s/%s", basedir,tmpc, cg);
493
494 if (mkdir(dirnam, 0755) < 0)
495 return -errno;
496
497 if (uid == 0 && gid == 0)
498 return 0;
499
500 if (chown(dirnam, uid, gid) < 0)
501 return -errno;
502
503 chown_all_cgroup_files(dirnam, uid, gid);
504
505 return 0;
506}
507
508static bool recursive_rmdir(const char *dirname)
509{
510 struct dirent dirent, *direntp;
511 DIR *dir;
512 bool ret = false;
513 char pathname[MAXPATHLEN];
514
515 dir = opendir(dirname);
516 if (!dir) {
517#if DEBUG
518 fprintf(stderr, "%s: failed to open %s: %s\n", __func__, dirname, strerror(errno));
519#endif
520 return false;
521 }
522
523 while (!readdir_r(dir, &dirent, &direntp)) {
524 struct stat mystat;
525 int rc;
526
527 if (!direntp)
528 break;
529
530 if (!strcmp(direntp->d_name, ".") ||
531 !strcmp(direntp->d_name, ".."))
532 continue;
533
534 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
535 if (rc < 0 || rc >= MAXPATHLEN) {
536 fprintf(stderr, "pathname too long\n");
537 continue;
538 }
539
540 ret = lstat(pathname, &mystat);
541 if (ret) {
542#if DEBUG
543 fprintf(stderr, "%s: failed to stat %s: %s\n", __func__, pathname, strerror(errno));
544#endif
545 continue;
546 }
547 if (S_ISDIR(mystat.st_mode)) {
548 if (!recursive_rmdir(pathname)) {
549#if DEBUG
550 fprintf(stderr, "Error removing %s\n", pathname);
551#endif
552 }
553 }
554 }
555
556 ret = true;
557 if (closedir(dir) < 0) {
558 fprintf(stderr, "%s: failed to close directory %s: %s\n", __func__, dirname, strerror(errno));
559 ret = false;
560 }
561
562 if (rmdir(dirname) < 0) {
563#if DEBUG
564 fprintf(stderr, "%s: failed to delete %s: %s\n", __func__, dirname, strerror(errno));
565#endif
566 ret = false;
567 }
568
569 return ret;
570}
571
572bool cgfs_remove(const char *controller, const char *cg)
573{
574 size_t len;
575 char *dirnam, *tmpc = find_mounted_controller(controller);
576
577 if (!tmpc)
578 return false;
579 /* basedir / tmpc / cg \0 */
580 len = strlen(basedir) + strlen(tmpc) + strlen(cg) + 3;
581 dirnam = alloca(len);
582 snprintf(dirnam, len, "%s/%s/%s", basedir,tmpc, cg);
583 return recursive_rmdir(dirnam);
584}
585
586bool cgfs_chmod_file(const char *controller, const char *file, mode_t mode)
587{
588 size_t len;
589 char *pathname, *tmpc = find_mounted_controller(controller);
590
591 if (!tmpc)
592 return false;
593 /* basedir / tmpc / file \0 */
594 len = strlen(basedir) + strlen(tmpc) + strlen(file) + 3;
595 pathname = alloca(len);
596 snprintf(pathname, len, "%s/%s/%s", basedir, tmpc, file);
597 if (chmod(pathname, mode) < 0)
598 return false;
599 return true;
600}
601
602static int chown_tasks_files(const char *dirname, uid_t uid, gid_t gid)
603{
604 size_t len;
605 char *fname;
606
607 len = strlen(dirname) + strlen("/cgroup.procs") + 1;
608 fname = alloca(len);
609 snprintf(fname, len, "%s/tasks", dirname);
610 if (chown(fname, uid, gid) != 0)
611 return -errno;
612 snprintf(fname, len, "%s/cgroup.procs", dirname);
613 if (chown(fname, uid, gid) != 0)
614 return -errno;
615 return 0;
616}
617
618int cgfs_chown_file(const char *controller, const char *file, uid_t uid, gid_t gid)
619{
620 size_t len;
621 char *pathname, *tmpc = find_mounted_controller(controller);
622
623 if (!tmpc)
624 return -EINVAL;
625 /* basedir / tmpc / file \0 */
626 len = strlen(basedir) + strlen(tmpc) + strlen(file) + 3;
627 pathname = alloca(len);
628 snprintf(pathname, len, "%s/%s/%s", basedir, tmpc, file);
629 if (chown(pathname, uid, gid) < 0)
630 return -errno;
631
632 if (is_dir(pathname))
633 // like cgmanager did, we want to chown the tasks file as well
634 return chown_tasks_files(pathname, uid, gid);
635
636 return 0;
637}
638
639FILE *open_pids_file(const char *controller, const char *cgroup)
640{
641 size_t len;
642 char *pathname, *tmpc = find_mounted_controller(controller);
643
644 if (!tmpc)
645 return NULL;
646 /* basedir / tmpc / cgroup / "cgroup.procs" \0 */
647 len = strlen(basedir) + strlen(tmpc) + strlen(cgroup) + 4 + strlen("cgroup.procs");
648 pathname = alloca(len);
649 snprintf(pathname, len, "%s/%s/%s/cgroup.procs", basedir, tmpc, cgroup);
650 return fopen(pathname, "w");
651}
652
f366da65
WB
653static bool cgfs_iterate_cgroup(const char *controller, const char *cgroup, bool directories,
654 void ***list, size_t typesize,
655 void* (*iterator)(const char*, const char*, const char*))
237e200e
SH
656{
657 size_t len;
658 char *dirname, *tmpc = find_mounted_controller(controller);
659 char pathname[MAXPATHLEN];
f366da65 660 size_t sz = 0, asz = 0;
237e200e
SH
661 struct dirent dirent, *direntp;
662 DIR *dir;
663 int ret;
664
f366da65 665 *list = NULL;
237e200e 666 if (!tmpc)
e97c834b 667 return false;
237e200e
SH
668
669 /* basedir / tmpc / cgroup \0 */
670 len = strlen(basedir) + strlen(tmpc) + strlen(cgroup) + 3;
671 dirname = alloca(len);
672 snprintf(dirname, len, "%s/%s/%s", basedir, tmpc, cgroup);
673
674 dir = opendir(dirname);
675 if (!dir)
676 return false;
677
678 while (!readdir_r(dir, &dirent, &direntp)) {
679 struct stat mystat;
680 int rc;
681
682 if (!direntp)
683 break;
684
685 if (!strcmp(direntp->d_name, ".") ||
686 !strcmp(direntp->d_name, ".."))
687 continue;
688
689 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
690 if (rc < 0 || rc >= MAXPATHLEN) {
691 fprintf(stderr, "%s: pathname too long under %s\n", __func__, dirname);
692 continue;
693 }
694
695 ret = lstat(pathname, &mystat);
696 if (ret) {
697 fprintf(stderr, "%s: failed to stat %s: %s\n", __func__, pathname, strerror(errno));
698 continue;
699 }
f366da65
WB
700 if ((!directories && !S_ISREG(mystat.st_mode)) ||
701 (directories && !S_ISDIR(mystat.st_mode)))
237e200e
SH
702 continue;
703
704 if (sz+2 >= asz) {
f366da65 705 void **tmp;
237e200e
SH
706 asz += BATCH_SIZE;
707 do {
f366da65 708 tmp = realloc(*list, asz * typesize);
237e200e
SH
709 } while (!tmp);
710 *list = tmp;
711 }
f366da65 712 (*list)[sz] = (*iterator)(controller, cgroup, direntp->d_name);
237e200e
SH
713 (*list)[sz+1] = NULL;
714 sz++;
715 }
716 if (closedir(dir) < 0) {
717 fprintf(stderr, "%s: failed closedir for %s: %s\n", __func__, dirname, strerror(errno));
718 return false;
719 }
720 return true;
721}
722
f366da65
WB
723static void *make_children_list_entry(const char *controller, const char *cgroup, const char *dir_entry)
724{
725 char *dup;
726 do {
727 dup = strdup(dir_entry);
728 } while (!dup);
729 return dup;
730}
731
732bool cgfs_list_children(const char *controller, const char *cgroup, char ***list)
733{
734 return cgfs_iterate_cgroup(controller, cgroup, true, (void***)list, sizeof(*list), &make_children_list_entry);
735}
736
237e200e
SH
737void free_key(struct cgfs_files *k)
738{
739 if (!k)
740 return;
741 free(k->name);
742 free(k);
743}
744
745void free_keys(struct cgfs_files **keys)
746{
747 int i;
748
749 if (!keys)
750 return;
751 for (i = 0; keys[i]; i++) {
752 free_key(keys[i]);
753 }
754 free(keys);
755}
756
757bool cgfs_get_value(const char *controller, const char *cgroup, const char *file, char **value)
758{
759 size_t len;
760 char *fnam, *tmpc = find_mounted_controller(controller);
761
762 if (!tmpc)
763 return false;
764 /* basedir / tmpc / cgroup / file \0 */
765 len = strlen(basedir) + strlen(tmpc) + strlen(cgroup) + strlen(file) + 4;
766 fnam = alloca(len);
767 snprintf(fnam, len, "%s/%s/%s/%s", basedir, tmpc, cgroup, file);
768
769 *value = slurp_file(fnam);
770 return *value != NULL;
771}
772
773struct cgfs_files *cgfs_get_key(const char *controller, const char *cgroup, const char *file)
774{
775 size_t len;
776 char *fnam, *tmpc = find_mounted_controller(controller);
777 struct stat sb;
778 struct cgfs_files *newkey;
779 int ret;
780
781 if (!tmpc)
782 return false;
783
784 if (file && *file == '/')
785 file++;
786
787 if (file && index(file, '/'))
788 return NULL;
789
790 /* basedir / tmpc / cgroup / file \0 */
791 len = strlen(basedir) + strlen(tmpc) + strlen(cgroup) + 3;
792 if (file)
793 len += strlen(file) + 1;
794 fnam = alloca(len);
795 snprintf(fnam, len, "%s/%s/%s%s%s", basedir, tmpc, cgroup,
796 file ? "/" : "", file ? file : "");
797
798 ret = stat(fnam, &sb);
799 if (ret < 0)
800 return NULL;
801
802 do {
803 newkey = malloc(sizeof(struct cgfs_files));
804 } while (!newkey);
805 if (file)
806 newkey->name = must_copy_string(file);
807 else if (rindex(cgroup, '/'))
808 newkey->name = must_copy_string(rindex(cgroup, '/'));
809 else
810 newkey->name = must_copy_string(cgroup);
811 newkey->uid = sb.st_uid;
812 newkey->gid = sb.st_gid;
813 newkey->mode = sb.st_mode;
814
815 return newkey;
816}
817
f366da65 818static void *make_key_list_entry(const char *controller, const char *cgroup, const char *dir_entry)
237e200e 819{
f366da65
WB
820 struct cgfs_files *entry = cgfs_get_key(controller, cgroup, dir_entry);
821 if (!entry) {
822 fprintf(stderr, "%s: Error getting files under %s:%s\n",
823 __func__, controller, cgroup);
237e200e 824 }
f366da65
WB
825 return entry;
826}
827
828bool cgfs_list_keys(const char *controller, const char *cgroup, struct cgfs_files ***keys)
829{
830 return cgfs_iterate_cgroup(controller, cgroup, false, (void***)keys, sizeof(*keys), &make_key_list_entry);
237e200e
SH
831}
832
833bool is_child_cgroup(const char *controller, const char *cgroup, const char *f)
834{ size_t len;
835 char *fnam, *tmpc = find_mounted_controller(controller);
836 int ret;
837 struct stat sb;
838
839 if (!tmpc)
840 return false;
841 /* basedir / tmpc / cgroup / f \0 */
842 len = strlen(basedir) + strlen(tmpc) + strlen(cgroup) + strlen(f) + 4;
843 fnam = alloca(len);
844 snprintf(fnam, len, "%s/%s/%s/%s", basedir, tmpc, cgroup, f);
845
846 ret = stat(fnam, &sb);
847 if (ret < 0 || !S_ISDIR(sb.st_mode))
848 return false;
849 return true;
850}
851
852#define SEND_CREDS_OK 0
853#define SEND_CREDS_NOTSK 1
854#define SEND_CREDS_FAIL 2
855static bool recv_creds(int sock, struct ucred *cred, char *v);
856static int wait_for_pid(pid_t pid);
857static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst);
858
859/*
860 * fork a task which switches to @task's namespace and writes '1'.
861 * over a unix sock so we can read the task's reaper's pid in our
862 * namespace
863 */
864static void write_task_init_pid_exit(int sock, pid_t target)
865{
866 struct ucred cred;
867 char fnam[100];
868 pid_t pid;
869 char v;
870 int fd, ret;
871
872 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", (int)target);
873 if (ret < 0 || ret >= sizeof(fnam))
874 _exit(1);
875
876 fd = open(fnam, O_RDONLY);
877 if (fd < 0) {
878 perror("write_task_init_pid_exit open of ns/pid");
879 _exit(1);
880 }
881 if (setns(fd, 0)) {
882 perror("write_task_init_pid_exit setns 1");
883 close(fd);
884 _exit(1);
885 }
886 pid = fork();
887 if (pid < 0)
888 _exit(1);
889 if (pid != 0) {
890 if (!wait_for_pid(pid))
891 _exit(1);
892 _exit(0);
893 }
894
895 /* we are the child */
896 cred.uid = 0;
897 cred.gid = 0;
898 cred.pid = 1;
899 v = '1';
900 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK)
901 _exit(1);
902 _exit(0);
903}
904
905static pid_t get_init_pid_for_task(pid_t task)
906{
907 int sock[2];
908 pid_t pid;
909 pid_t ret = -1;
910 char v = '0';
911 struct ucred cred;
912
913 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
914 perror("socketpair");
915 return -1;
916 }
917
918 pid = fork();
919 if (pid < 0)
920 goto out;
921 if (!pid) {
922 close(sock[1]);
923 write_task_init_pid_exit(sock[0], task);
924 _exit(0);
925 }
926
927 if (!recv_creds(sock[1], &cred, &v))
928 goto out;
929 ret = cred.pid;
930
931out:
932 close(sock[0]);
933 close(sock[1]);
934 if (pid > 0)
935 wait_for_pid(pid);
936 return ret;
937}
938
939static pid_t lookup_initpid_in_store(pid_t qpid)
940{
941 pid_t answer = 0;
942 struct stat sb;
943 struct pidns_init_store *e;
944 char fnam[100];
945
946 snprintf(fnam, 100, "/proc/%d/ns/pid", qpid);
947 store_lock();
948 if (stat(fnam, &sb) < 0)
949 goto out;
950 e = lookup_verify_initpid(&sb);
951 if (e) {
952 answer = e->initpid;
953 goto out;
954 }
955 answer = get_init_pid_for_task(qpid);
956 if (answer > 0)
957 save_initpid(&sb, answer);
958
959out:
960 /* we prune at end in case we are returning
961 * the value we were about to return */
962 prune_initpid_store();
963 store_unlock();
964 return answer;
965}
966
967static int wait_for_pid(pid_t pid)
968{
969 int status, ret;
970
971 if (pid <= 0)
972 return -1;
973
974again:
975 ret = waitpid(pid, &status, 0);
976 if (ret == -1) {
977 if (errno == EINTR)
978 goto again;
979 return -1;
980 }
981 if (ret != pid)
982 goto again;
983 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
984 return -1;
985 return 0;
986}
987
988
989/*
990 * append pid to *src.
991 * src: a pointer to a char* in which ot append the pid.
992 * sz: the number of characters printed so far, minus trailing \0.
993 * asz: the allocated size so far
994 * pid: the pid to append
995 */
996static void must_strcat_pid(char **src, size_t *sz, size_t *asz, pid_t pid)
997{
998 char tmp[30];
999
1000 int tmplen = sprintf(tmp, "%d\n", (int)pid);
1001
1002 if (!*src || tmplen + *sz + 1 >= *asz) {
1003 char *tmp;
1004 do {
1005 tmp = realloc(*src, *asz + BUF_RESERVE_SIZE);
1006 } while (!tmp);
1007 *src = tmp;
1008 *asz += BUF_RESERVE_SIZE;
1009 }
bbfd0e33 1010 memcpy((*src) +*sz , tmp, tmplen+1); /* include the \0 */
237e200e 1011 *sz += tmplen;
237e200e
SH
1012}
1013
1014/*
1015 * Given a open file * to /proc/pid/{u,g}id_map, and an id
1016 * valid in the caller's namespace, return the id mapped into
1017 * pid's namespace.
1018 * Returns the mapped id, or -1 on error.
1019 */
1020unsigned int
1021convert_id_to_ns(FILE *idfile, unsigned int in_id)
1022{
1023 unsigned int nsuid, // base id for a range in the idfile's namespace
1024 hostuid, // base id for a range in the caller's namespace
1025 count; // number of ids in this range
1026 char line[400];
1027 int ret;
1028
1029 fseek(idfile, 0L, SEEK_SET);
1030 while (fgets(line, 400, idfile)) {
1031 ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count);
1032 if (ret != 3)
1033 continue;
1034 if (hostuid + count < hostuid || nsuid + count < nsuid) {
1035 /*
1036 * uids wrapped around - unexpected as this is a procfile,
1037 * so just bail.
1038 */
1039 fprintf(stderr, "pid wrapparound at entry %u %u %u in %s\n",
1040 nsuid, hostuid, count, line);
1041 return -1;
1042 }
1043 if (hostuid <= in_id && hostuid+count > in_id) {
1044 /*
1045 * now since hostuid <= in_id < hostuid+count, and
1046 * hostuid+count and nsuid+count do not wrap around,
1047 * we know that nsuid+(in_id-hostuid) which must be
1048 * less that nsuid+(count) must not wrap around
1049 */
1050 return (in_id - hostuid) + nsuid;
1051 }
1052 }
1053
1054 // no answer found
1055 return -1;
1056}
1057
1058/*
1059 * for is_privileged_over,
1060 * specify whether we require the calling uid to be root in his
1061 * namespace
1062 */
1063#define NS_ROOT_REQD true
1064#define NS_ROOT_OPT false
1065
1066#define PROCLEN 100
1067
1068static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root)
1069{
1070 char fpath[PROCLEN];
1071 int ret;
1072 bool answer = false;
1073 uid_t nsuid;
1074
1075 if (victim == -1 || uid == -1)
1076 return false;
1077
1078 /*
1079 * If the request is one not requiring root in the namespace,
1080 * then having the same uid suffices. (i.e. uid 1000 has write
1081 * access to files owned by uid 1000
1082 */
1083 if (!req_ns_root && uid == victim)
1084 return true;
1085
1086 ret = snprintf(fpath, PROCLEN, "/proc/%d/uid_map", pid);
1087 if (ret < 0 || ret >= PROCLEN)
1088 return false;
1089 FILE *f = fopen(fpath, "r");
1090 if (!f)
1091 return false;
1092
1093 /* if caller's not root in his namespace, reject */
1094 nsuid = convert_id_to_ns(f, uid);
1095 if (nsuid)
1096 goto out;
1097
1098 /*
1099 * If victim is not mapped into caller's ns, reject.
1100 * XXX I'm not sure this check is needed given that fuse
1101 * will be sending requests where the vfs has converted
1102 */
1103 nsuid = convert_id_to_ns(f, victim);
1104 if (nsuid == -1)
1105 goto out;
1106
1107 answer = true;
1108
1109out:
1110 fclose(f);
1111 return answer;
1112}
1113
1114static bool perms_include(int fmode, mode_t req_mode)
1115{
1116 mode_t r;
1117
1118 switch (req_mode & O_ACCMODE) {
1119 case O_RDONLY:
1120 r = S_IROTH;
1121 break;
1122 case O_WRONLY:
1123 r = S_IWOTH;
1124 break;
1125 case O_RDWR:
1126 r = S_IROTH | S_IWOTH;
1127 break;
1128 default:
1129 return false;
1130 }
1131 return ((fmode & r) == r);
1132}
1133
1134
1135/*
1136 * taskcg is a/b/c
1137 * querycg is /a/b/c/d/e
1138 * we return 'd'
1139 */
1140static char *get_next_cgroup_dir(const char *taskcg, const char *querycg)
1141{
1142 char *start, *end;
1143
1144 if (strlen(taskcg) <= strlen(querycg)) {
1145 fprintf(stderr, "%s: I was fed bad input\n", __func__);
1146 return NULL;
1147 }
1148
1149 if (strcmp(querycg, "/") == 0)
1150 start = strdup(taskcg + 1);
1151 else
1152 start = strdup(taskcg + strlen(querycg) + 1);
1153 if (!start)
1154 return NULL;
1155 end = strchr(start, '/');
1156 if (end)
1157 *end = '\0';
1158 return start;
1159}
1160
1161static void stripnewline(char *x)
1162{
1163 size_t l = strlen(x);
1164 if (l && x[l-1] == '\n')
1165 x[l-1] = '\0';
1166}
1167
1168static char *get_pid_cgroup(pid_t pid, const char *contrl)
1169{
1170 char fnam[PROCLEN];
1171 FILE *f;
1172 char *answer = NULL;
1173 char *line = NULL;
1174 size_t len = 0;
1175 int ret;
1176 const char *h = find_mounted_controller(contrl);
1177 if (!h)
1178 return NULL;
1179
1180 ret = snprintf(fnam, PROCLEN, "/proc/%d/cgroup", pid);
1181 if (ret < 0 || ret >= PROCLEN)
1182 return NULL;
1183 if (!(f = fopen(fnam, "r")))
1184 return NULL;
1185
1186 while (getline(&line, &len, f) != -1) {
1187 char *c1, *c2;
1188 if (!line[0])
1189 continue;
1190 c1 = strchr(line, ':');
1191 if (!c1)
1192 goto out;
1193 c1++;
1194 c2 = strchr(c1, ':');
1195 if (!c2)
1196 goto out;
1197 *c2 = '\0';
1198 if (strcmp(c1, h) != 0)
1199 continue;
1200 c2++;
1201 stripnewline(c2);
1202 do {
1203 answer = strdup(c2);
1204 } while (!answer);
1205 break;
1206 }
1207
1208out:
1209 fclose(f);
1210 free(line);
1211 return answer;
1212}
1213
1214/*
1215 * check whether a fuse context may access a cgroup dir or file
1216 *
1217 * If file is not null, it is a cgroup file to check under cg.
1218 * If file is null, then we are checking perms on cg itself.
1219 *
1220 * For files we can check the mode of the list_keys result.
1221 * For cgroups, we must make assumptions based on the files under the
1222 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
1223 * yet.
1224 */
1225static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode)
1226{
1227 struct cgfs_files *k = NULL;
1228 bool ret = false;
1229
1230 k = cgfs_get_key(contrl, cg, file);
1231 if (!k)
1232 return false;
1233
1234 if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
1235 if (perms_include(k->mode >> 6, mode)) {
1236 ret = true;
1237 goto out;
1238 }
1239 }
1240 if (fc->gid == k->gid) {
1241 if (perms_include(k->mode >> 3, mode)) {
1242 ret = true;
1243 goto out;
1244 }
1245 }
1246 ret = perms_include(k->mode, mode);
1247
1248out:
1249 free_key(k);
1250 return ret;
1251}
1252
1253#define INITSCOPE "/init.scope"
1254static void prune_init_slice(char *cg)
1255{
1256 char *point;
1257 size_t cg_len = strlen(cg), initscope_len = strlen(INITSCOPE);
1258
1259 if (cg_len < initscope_len)
1260 return;
1261
1262 point = cg + cg_len - initscope_len;
1263 if (strcmp(point, INITSCOPE) == 0) {
1264 if (point == cg)
1265 *(point+1) = '\0';
1266 else
1267 *point = '\0';
1268 }
1269}
1270
1271/*
1272 * If pid is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
1273 * If pid is in /a, he may act on /a/b, but not on /b.
1274 * if the answer is false and nextcg is not NULL, then *nextcg will point
1275 * to a string containing the next cgroup directory under cg, which must be
1276 * freed by the caller.
1277 */
1278static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg)
1279{
1280 bool answer = false;
1281 char *c2 = get_pid_cgroup(pid, contrl);
1282 char *linecmp;
1283
1284 if (!c2)
1285 return false;
1286 prune_init_slice(c2);
1287
1288 /*
1289 * callers pass in '/' for root cgroup, otherwise they pass
1290 * in a cgroup without leading '/'
1291 */
1292 linecmp = *cg == '/' ? c2 : c2+1;
1293 if (strncmp(linecmp, cg, strlen(linecmp)) != 0) {
1294 if (nextcg) {
1295 *nextcg = get_next_cgroup_dir(linecmp, cg);
1296 }
1297 goto out;
1298 }
1299 answer = true;
1300
1301out:
1302 free(c2);
1303 return answer;
1304}
1305
1306/*
1307 * If pid is in /a/b/c, he may see that /a exists, but not /b or /a/c.
1308 */
1309static bool caller_may_see_dir(pid_t pid, const char *contrl, const char *cg)
1310{
1311 bool answer = false;
1312 char *c2, *task_cg;
1313 size_t target_len, task_len;
1314
1315 if (strcmp(cg, "/") == 0)
1316 return true;
1317
1318 c2 = get_pid_cgroup(pid, contrl);
1319 if (!c2)
1320 return false;
1321 prune_init_slice(c2);
1322
1323 task_cg = c2 + 1;
1324 target_len = strlen(cg);
1325 task_len = strlen(task_cg);
1326 if (task_len == 0) {
1327 /* Task is in the root cg, it can see everything. This case is
1328 * not handled by the strmcps below, since they test for the
1329 * last /, but that is the first / that we've chopped off
1330 * above.
1331 */
1332 answer = true;
1333 goto out;
1334 }
1335 if (strcmp(cg, task_cg) == 0) {
1336 answer = true;
1337 goto out;
1338 }
1339 if (target_len < task_len) {
1340 /* looking up a parent dir */
1341 if (strncmp(task_cg, cg, target_len) == 0 && task_cg[target_len] == '/')
1342 answer = true;
1343 goto out;
1344 }
1345 if (target_len > task_len) {
1346 /* looking up a child dir */
1347 if (strncmp(task_cg, cg, task_len) == 0 && cg[task_len] == '/')
1348 answer = true;
1349 goto out;
1350 }
1351
1352out:
1353 free(c2);
1354 return answer;
1355}
1356
1357/*
1358 * given /cgroup/freezer/a/b, return "freezer".
1359 * the returned char* should NOT be freed.
1360 */
1361static char *pick_controller_from_path(struct fuse_context *fc, const char *path)
1362{
1363 const char *p1;
1364 char *contr, *slash;
1365
1366 if (strlen(path) < 9)
1367 return NULL;
1368 if (*(path+7) != '/')
1369 return NULL;
1370 p1 = path+8;
1371 contr = strdupa(p1);
1372 if (!contr)
1373 return NULL;
1374 slash = strstr(contr, "/");
1375 if (slash)
1376 *slash = '\0';
1377
1378 int i;
1379 for (i = 0; i < num_hierarchies; i++) {
1380 if (hierarchies[i] && strcmp(hierarchies[i], contr) == 0)
1381 return hierarchies[i];
1382 }
1383 return NULL;
1384}
1385
1386/*
1387 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
1388 * Note that the returned value may include files (keynames) etc
1389 */
1390static const char *find_cgroup_in_path(const char *path)
1391{
1392 const char *p1;
1393
1394 if (strlen(path) < 9)
1395 return NULL;
1396 p1 = strstr(path+8, "/");
1397 if (!p1)
1398 return NULL;
1399 return p1+1;
1400}
1401
1402/*
1403 * split the last path element from the path in @cg.
1404 * @dir is newly allocated and should be freed, @last not
1405*/
1406static void get_cgdir_and_path(const char *cg, char **dir, char **last)
1407{
1408 char *p;
1409
1410 do {
1411 *dir = strdup(cg);
1412 } while (!*dir);
1413 *last = strrchr(cg, '/');
1414 if (!*last) {
1415 *last = NULL;
1416 return;
1417 }
1418 p = strrchr(*dir, '/');
1419 *p = '\0';
1420}
1421
1422/*
1423 * FUSE ops for /cgroup
1424 */
1425
1426int cg_getattr(const char *path, struct stat *sb)
1427{
1428 struct timespec now;
1429 struct fuse_context *fc = fuse_get_context();
1430 char * cgdir = NULL;
1431 char *last = NULL, *path1, *path2;
1432 struct cgfs_files *k = NULL;
1433 const char *cgroup;
1434 const char *controller = NULL;
1435 int ret = -ENOENT;
1436
1437
1438 if (!fc)
1439 return -EIO;
1440
1441 memset(sb, 0, sizeof(struct stat));
1442
1443 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
1444 return -EINVAL;
1445
1446 sb->st_uid = sb->st_gid = 0;
1447 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
1448 sb->st_size = 0;
1449
1450 if (strcmp(path, "/cgroup") == 0) {
1451 sb->st_mode = S_IFDIR | 00755;
1452 sb->st_nlink = 2;
1453 return 0;
1454 }
1455
1456 controller = pick_controller_from_path(fc, path);
1457 if (!controller)
1458 return -EIO;
1459 cgroup = find_cgroup_in_path(path);
1460 if (!cgroup) {
1461 /* this is just /cgroup/controller, return it as a dir */
1462 sb->st_mode = S_IFDIR | 00755;
1463 sb->st_nlink = 2;
1464 return 0;
1465 }
1466
1467 get_cgdir_and_path(cgroup, &cgdir, &last);
1468
1469 if (!last) {
1470 path1 = "/";
1471 path2 = cgdir;
1472 } else {
1473 path1 = cgdir;
1474 path2 = last;
1475 }
1476
1477 pid_t initpid = lookup_initpid_in_store(fc->pid);
1478 if (initpid <= 0)
1479 initpid = fc->pid;
1480 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
1481 * Then check that caller's cgroup is under path if last is a child
1482 * cgroup, or cgdir if last is a file */
1483
1484 if (is_child_cgroup(controller, path1, path2)) {
1485 if (!caller_may_see_dir(initpid, controller, cgroup)) {
1486 ret = -ENOENT;
1487 goto out;
1488 }
1489 if (!caller_is_in_ancestor(initpid, controller, cgroup, NULL)) {
1490 /* this is just /cgroup/controller, return it as a dir */
1491 sb->st_mode = S_IFDIR | 00555;
1492 sb->st_nlink = 2;
1493 ret = 0;
1494 goto out;
1495 }
1496 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) {
1497 ret = -EACCES;
1498 goto out;
1499 }
1500
1501 // get uid, gid, from '/tasks' file and make up a mode
1502 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1503 sb->st_mode = S_IFDIR | 00755;
1504 k = cgfs_get_key(controller, cgroup, NULL);
1505 if (!k) {
1506 sb->st_uid = sb->st_gid = 0;
1507 } else {
1508 sb->st_uid = k->uid;
1509 sb->st_gid = k->gid;
1510 }
1511 free_key(k);
1512 sb->st_nlink = 2;
1513 ret = 0;
1514 goto out;
1515 }
1516
1517 if ((k = cgfs_get_key(controller, path1, path2)) != NULL) {
1518 sb->st_mode = S_IFREG | k->mode;
1519 sb->st_nlink = 1;
1520 sb->st_uid = k->uid;
1521 sb->st_gid = k->gid;
1522 sb->st_size = 0;
1523 free_key(k);
1524 if (!caller_is_in_ancestor(initpid, controller, path1, NULL)) {
1525 ret = -ENOENT;
1526 goto out;
1527 }
1528 if (!fc_may_access(fc, controller, path1, path2, O_RDONLY)) {
1529 ret = -EACCES;
1530 goto out;
1531 }
1532
1533 ret = 0;
1534 }
1535
1536out:
1537 free(cgdir);
1538 return ret;
1539}
1540
1541int cg_opendir(const char *path, struct fuse_file_info *fi)
1542{
1543 struct fuse_context *fc = fuse_get_context();
1544 const char *cgroup;
1545 struct file_info *dir_info;
1546 char *controller = NULL;
1547
1548 if (!fc)
1549 return -EIO;
1550
1551 if (strcmp(path, "/cgroup") == 0) {
1552 cgroup = NULL;
1553 controller = NULL;
1554 } else {
1555 // return list of keys for the controller, and list of child cgroups
1556 controller = pick_controller_from_path(fc, path);
1557 if (!controller)
1558 return -EIO;
1559
1560 cgroup = find_cgroup_in_path(path);
1561 if (!cgroup) {
1562 /* this is just /cgroup/controller, return its contents */
1563 cgroup = "/";
1564 }
1565 }
1566
1567 pid_t initpid = lookup_initpid_in_store(fc->pid);
1568 if (initpid <= 0)
1569 initpid = fc->pid;
1570 if (cgroup) {
1571 if (!caller_may_see_dir(initpid, controller, cgroup))
1572 return -ENOENT;
1573 if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY))
1574 return -EACCES;
1575 }
1576
1577 /* we'll free this at cg_releasedir */
1578 dir_info = malloc(sizeof(*dir_info));
1579 if (!dir_info)
1580 return -ENOMEM;
1581 dir_info->controller = must_copy_string(controller);
1582 dir_info->cgroup = must_copy_string(cgroup);
1583 dir_info->type = LXC_TYPE_CGDIR;
1584 dir_info->buf = NULL;
1585 dir_info->file = NULL;
1586 dir_info->buflen = 0;
1587
1588 fi->fh = (unsigned long)dir_info;
1589 return 0;
1590}
1591
1592int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
1593 struct fuse_file_info *fi)
1594{
1595 struct file_info *d = (struct file_info *)fi->fh;
1596 struct cgfs_files **list = NULL;
1597 int i, ret;
1598 char *nextcg = NULL;
1599 struct fuse_context *fc = fuse_get_context();
1600 char **clist = NULL;
1601
1602 if (d->type != LXC_TYPE_CGDIR) {
1603 fprintf(stderr, "Internal error: file cache info used in readdir\n");
1604 return -EIO;
1605 }
1606 if (!d->cgroup && !d->controller) {
1607 // ls /var/lib/lxcfs/cgroup - just show list of controllers
1608 int i;
1609
1610 for (i = 0; i < num_hierarchies; i++) {
1611 if (hierarchies[i] && filler(buf, hierarchies[i], NULL, 0) != 0) {
1612 return -EIO;
1613 }
1614 }
1615 return 0;
1616 }
1617
1618 if (!cgfs_list_keys(d->controller, d->cgroup, &list)) {
1619 // not a valid cgroup
1620 ret = -EINVAL;
1621 goto out;
1622 }
1623
1624 pid_t initpid = lookup_initpid_in_store(fc->pid);
1625 if (initpid <= 0)
1626 initpid = fc->pid;
1627 if (!caller_is_in_ancestor(initpid, d->controller, d->cgroup, &nextcg)) {
1628 if (nextcg) {
1629 ret = filler(buf, nextcg, NULL, 0);
1630 free(nextcg);
1631 if (ret != 0) {
1632 ret = -EIO;
1633 goto out;
1634 }
1635 }
1636 ret = 0;
1637 goto out;
1638 }
1639
1640 for (i = 0; list[i]; i++) {
1641 if (filler(buf, list[i]->name, NULL, 0) != 0) {
1642 ret = -EIO;
1643 goto out;
1644 }
1645 }
1646
1647 // now get the list of child cgroups
1648
1649 if (!cgfs_list_children(d->controller, d->cgroup, &clist)) {
1650 ret = 0;
1651 goto out;
1652 }
f366da65
WB
1653 if (clist) {
1654 for (i = 0; clist[i]; i++) {
1655 if (filler(buf, clist[i], NULL, 0) != 0) {
1656 ret = -EIO;
1657 goto out;
1658 }
237e200e
SH
1659 }
1660 }
1661 ret = 0;
1662
1663out:
1664 free_keys(list);
1665 if (clist) {
1666 for (i = 0; clist[i]; i++)
1667 free(clist[i]);
1668 free(clist);
1669 }
1670 return ret;
1671}
1672
1673static void do_release_file_info(struct file_info *f)
1674{
1675 if (!f)
1676 return;
1677 free(f->controller);
1678 free(f->cgroup);
1679 free(f->file);
1680 free(f->buf);
1681 free(f);
1682}
1683
1684int cg_releasedir(const char *path, struct fuse_file_info *fi)
1685{
1686 struct file_info *d = (struct file_info *)fi->fh;
1687
1688 do_release_file_info(d);
1689 return 0;
1690}
1691
1692int cg_open(const char *path, struct fuse_file_info *fi)
1693{
1694 const char *cgroup;
1695 char *last = NULL, *path1, *path2, * cgdir = NULL, *controller;
1696 struct cgfs_files *k = NULL;
1697 struct file_info *file_info;
1698 struct fuse_context *fc = fuse_get_context();
1699 int ret;
1700
1701 if (!fc)
1702 return -EIO;
1703
1704 controller = pick_controller_from_path(fc, path);
1705 if (!controller)
1706 return -EIO;
1707 cgroup = find_cgroup_in_path(path);
1708 if (!cgroup)
1709 return -EINVAL;
1710
1711 get_cgdir_and_path(cgroup, &cgdir, &last);
1712 if (!last) {
1713 path1 = "/";
1714 path2 = cgdir;
1715 } else {
1716 path1 = cgdir;
1717 path2 = last;
1718 }
1719
1720 k = cgfs_get_key(controller, path1, path2);
1721 if (!k) {
1722 ret = -EINVAL;
1723 goto out;
1724 }
1725 free_key(k);
1726
1727 pid_t initpid = lookup_initpid_in_store(fc->pid);
1728 if (initpid <= 0)
1729 initpid = fc->pid;
1730 if (!caller_may_see_dir(initpid, controller, path1)) {
1731 ret = -ENOENT;
1732 goto out;
1733 }
1734 if (!fc_may_access(fc, controller, path1, path2, fi->flags)) {
1735 // should never get here
1736 ret = -EACCES;
1737 goto out;
1738 }
1739
1740 /* we'll free this at cg_release */
1741 file_info = malloc(sizeof(*file_info));
1742 if (!file_info) {
1743 ret = -ENOMEM;
1744 goto out;
1745 }
1746 file_info->controller = must_copy_string(controller);
1747 file_info->cgroup = must_copy_string(path1);
1748 file_info->file = must_copy_string(path2);
1749 file_info->type = LXC_TYPE_CGFILE;
1750 file_info->buf = NULL;
1751 file_info->buflen = 0;
1752
1753 fi->fh = (unsigned long)file_info;
1754 ret = 0;
1755
1756out:
1757 free(cgdir);
1758 return ret;
1759}
1760
1761int cg_release(const char *path, struct fuse_file_info *fi)
1762{
1763 struct file_info *f = (struct file_info *)fi->fh;
1764
1765 do_release_file_info(f);
1766 return 0;
1767}
1768
1769#define POLLIN_SET ( EPOLLIN | EPOLLHUP | EPOLLRDHUP )
1770
1771static bool wait_for_sock(int sock, int timeout)
1772{
1773 struct epoll_event ev;
1774 int epfd, ret, now, starttime, deltatime, saved_errno;
1775
1776 if ((starttime = time(NULL)) < 0)
1777 return false;
1778
1779 if ((epfd = epoll_create(1)) < 0) {
1780 fprintf(stderr, "Failed to create epoll socket: %m\n");
1781 return false;
1782 }
1783
1784 ev.events = POLLIN_SET;
1785 ev.data.fd = sock;
1786 if (epoll_ctl(epfd, EPOLL_CTL_ADD, sock, &ev) < 0) {
1787 fprintf(stderr, "Failed adding socket to epoll: %m\n");
1788 close(epfd);
1789 return false;
1790 }
1791
1792again:
1793 if ((now = time(NULL)) < 0) {
1794 close(epfd);
1795 return false;
1796 }
1797
1798 deltatime = (starttime + timeout) - now;
1799 if (deltatime < 0) { // timeout
1800 errno = 0;
1801 close(epfd);
1802 return false;
1803 }
1804 ret = epoll_wait(epfd, &ev, 1, 1000*deltatime + 1);
1805 if (ret < 0 && errno == EINTR)
1806 goto again;
1807 saved_errno = errno;
1808 close(epfd);
1809
1810 if (ret <= 0) {
1811 errno = saved_errno;
1812 return false;
1813 }
1814 return true;
1815}
1816
1817static int msgrecv(int sockfd, void *buf, size_t len)
1818{
1819 if (!wait_for_sock(sockfd, 2))
1820 return -1;
1821 return recv(sockfd, buf, len, MSG_DONTWAIT);
1822}
1823
1824static int send_creds(int sock, struct ucred *cred, char v, bool pingfirst)
1825{
1826 struct msghdr msg = { 0 };
1827 struct iovec iov;
1828 struct cmsghdr *cmsg;
1829 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
1830 char buf[1];
1831 buf[0] = 'p';
1832
1833 if (pingfirst) {
1834 if (msgrecv(sock, buf, 1) != 1) {
1835 fprintf(stderr, "%s: Error getting reply from server over socketpair\n",
1836 __func__);
1837 return SEND_CREDS_FAIL;
1838 }
1839 }
1840
1841 msg.msg_control = cmsgbuf;
1842 msg.msg_controllen = sizeof(cmsgbuf);
1843
1844 cmsg = CMSG_FIRSTHDR(&msg);
1845 cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
1846 cmsg->cmsg_level = SOL_SOCKET;
1847 cmsg->cmsg_type = SCM_CREDENTIALS;
1848 memcpy(CMSG_DATA(cmsg), cred, sizeof(*cred));
1849
1850 msg.msg_name = NULL;
1851 msg.msg_namelen = 0;
1852
1853 buf[0] = v;
1854 iov.iov_base = buf;
1855 iov.iov_len = sizeof(buf);
1856 msg.msg_iov = &iov;
1857 msg.msg_iovlen = 1;
1858
1859 if (sendmsg(sock, &msg, 0) < 0) {
1860 fprintf(stderr, "%s: failed at sendmsg: %s\n", __func__,
1861 strerror(errno));
1862 if (errno == 3)
1863 return SEND_CREDS_NOTSK;
1864 return SEND_CREDS_FAIL;
1865 }
1866
1867 return SEND_CREDS_OK;
1868}
1869
1870static bool recv_creds(int sock, struct ucred *cred, char *v)
1871{
1872 struct msghdr msg = { 0 };
1873 struct iovec iov;
1874 struct cmsghdr *cmsg;
1875 char cmsgbuf[CMSG_SPACE(sizeof(*cred))];
1876 char buf[1];
1877 int ret;
1878 int optval = 1;
1879
1880 *v = '1';
1881
1882 cred->pid = -1;
1883 cred->uid = -1;
1884 cred->gid = -1;
1885
1886 if (setsockopt(sock, SOL_SOCKET, SO_PASSCRED, &optval, sizeof(optval)) == -1) {
1887 fprintf(stderr, "Failed to set passcred: %s\n", strerror(errno));
1888 return false;
1889 }
1890 buf[0] = '1';
1891 if (write(sock, buf, 1) != 1) {
1892 fprintf(stderr, "Failed to start write on scm fd: %s\n", strerror(errno));
1893 return false;
1894 }
1895
1896 msg.msg_name = NULL;
1897 msg.msg_namelen = 0;
1898 msg.msg_control = cmsgbuf;
1899 msg.msg_controllen = sizeof(cmsgbuf);
1900
1901 iov.iov_base = buf;
1902 iov.iov_len = sizeof(buf);
1903 msg.msg_iov = &iov;
1904 msg.msg_iovlen = 1;
1905
1906 if (!wait_for_sock(sock, 2)) {
1907 fprintf(stderr, "Timed out waiting for scm_cred: %s\n",
1908 strerror(errno));
1909 return false;
1910 }
1911 ret = recvmsg(sock, &msg, MSG_DONTWAIT);
1912 if (ret < 0) {
1913 fprintf(stderr, "Failed to receive scm_cred: %s\n",
1914 strerror(errno));
1915 return false;
1916 }
1917
1918 cmsg = CMSG_FIRSTHDR(&msg);
1919
1920 if (cmsg && cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)) &&
1921 cmsg->cmsg_level == SOL_SOCKET &&
1922 cmsg->cmsg_type == SCM_CREDENTIALS) {
1923 memcpy(cred, CMSG_DATA(cmsg), sizeof(*cred));
1924 }
1925 *v = buf[0];
1926
1927 return true;
1928}
1929
1930
1931/*
1932 * pid_to_ns - reads pids from a ucred over a socket, then writes the
1933 * int value back over the socket. This shifts the pid from the
1934 * sender's pidns into tpid's pidns.
1935 */
1936static void pid_to_ns(int sock, pid_t tpid)
1937{
1938 char v = '0';
1939 struct ucred cred;
1940
1941 while (recv_creds(sock, &cred, &v)) {
1942 if (v == '1')
1943 _exit(0);
1944 if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t))
1945 _exit(1);
1946 }
1947 _exit(0);
1948}
1949
1950/*
1951 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
1952 * in your old pidns. Only children which you fork will be in the target
1953 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
1954 * actually convert pids
1955 */
1956static void pid_to_ns_wrapper(int sock, pid_t tpid)
1957{
1958 int newnsfd = -1, ret, cpipe[2];
1959 char fnam[100];
1960 pid_t cpid;
1961 char v;
1962
1963 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
1964 if (ret < 0 || ret >= sizeof(fnam))
1965 _exit(1);
1966 newnsfd = open(fnam, O_RDONLY);
1967 if (newnsfd < 0)
1968 _exit(1);
1969 if (setns(newnsfd, 0) < 0)
1970 _exit(1);
1971 close(newnsfd);
1972
1973 if (pipe(cpipe) < 0)
1974 _exit(1);
1975
1976 cpid = fork();
1977 if (cpid < 0)
1978 _exit(1);
1979
1980 if (!cpid) {
1981 char b = '1';
1982 close(cpipe[0]);
1983 if (write(cpipe[1], &b, sizeof(char)) < 0) {
1984 fprintf(stderr, "%s (child): erorr on write: %s\n",
1985 __func__, strerror(errno));
1986 }
1987 close(cpipe[1]);
1988 pid_to_ns(sock, tpid);
1989 _exit(1); // not reached
1990 }
1991 // give the child 1 second to be done forking and
1992 // write its ack
1993 if (!wait_for_sock(cpipe[0], 1))
1994 _exit(1);
1995 ret = read(cpipe[0], &v, 1);
1996 if (ret != sizeof(char) || v != '1')
1997 _exit(1);
1998
1999 if (!wait_for_pid(cpid))
2000 _exit(1);
2001 _exit(0);
2002}
2003
2004/*
2005 * To read cgroup files with a particular pid, we will setns into the child
2006 * pidns, open a pipe, fork a child - which will be the first to really be in
2007 * the child ns - which does the cgfs_get_value and writes the data to the pipe.
2008 */
2009bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, const char *file, char **d)
2010{
2011 int sock[2] = {-1, -1};
2012 char *tmpdata = NULL;
2013 int ret;
2014 pid_t qpid, cpid = -1;
2015 bool answer = false;
2016 char v = '0';
2017 struct ucred cred;
2018 size_t sz = 0, asz = 0;
2019
2020 if (!cgfs_get_value(contrl, cg, file, &tmpdata))
2021 return false;
2022
2023 /*
2024 * Now we read the pids from returned data one by one, pass
2025 * them into a child in the target namespace, read back the
2026 * translated pids, and put them into our to-return data
2027 */
2028
2029 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
2030 perror("socketpair");
2031 free(tmpdata);
2032 return false;
2033 }
2034
2035 cpid = fork();
2036 if (cpid == -1)
2037 goto out;
2038
2039 if (!cpid) // child - exits when done
2040 pid_to_ns_wrapper(sock[1], tpid);
2041
2042 char *ptr = tmpdata;
2043 cred.uid = 0;
2044 cred.gid = 0;
2045 while (sscanf(ptr, "%d\n", &qpid) == 1) {
2046 cred.pid = qpid;
2047 ret = send_creds(sock[0], &cred, v, true);
2048
2049 if (ret == SEND_CREDS_NOTSK)
2050 goto next;
2051 if (ret == SEND_CREDS_FAIL)
2052 goto out;
2053
2054 // read converted results
2055 if (!wait_for_sock(sock[0], 2)) {
2056 fprintf(stderr, "%s: timed out waiting for pid from child: %s\n",
2057 __func__, strerror(errno));
2058 goto out;
2059 }
2060 if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
2061 fprintf(stderr, "%s: error reading pid from child: %s\n",
2062 __func__, strerror(errno));
2063 goto out;
2064 }
2065 must_strcat_pid(d, &sz, &asz, qpid);
2066next:
2067 ptr = strchr(ptr, '\n');
2068 if (!ptr)
2069 break;
2070 ptr++;
2071 }
2072
2073 cred.pid = getpid();
2074 v = '1';
2075 if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) {
2076 // failed to ask child to exit
2077 fprintf(stderr, "%s: failed to ask child to exit: %s\n",
2078 __func__, strerror(errno));
2079 goto out;
2080 }
2081
2082 answer = true;
2083
2084out:
2085 free(tmpdata);
2086 if (cpid != -1)
2087 wait_for_pid(cpid);
2088 if (sock[0] != -1) {
2089 close(sock[0]);
2090 close(sock[1]);
2091 }
2092 return answer;
2093}
2094
2095int cg_read(const char *path, char *buf, size_t size, off_t offset,
2096 struct fuse_file_info *fi)
2097{
2098 struct fuse_context *fc = fuse_get_context();
2099 struct file_info *f = (struct file_info *)fi->fh;
2100 struct cgfs_files *k = NULL;
2101 char *data = NULL;
2102 int ret, s;
2103 bool r;
2104
2105 if (f->type != LXC_TYPE_CGFILE) {
2106 fprintf(stderr, "Internal error: directory cache info used in cg_read\n");
2107 return -EIO;
2108 }
2109
2110 if (offset)
2111 return 0;
2112
2113 if (!fc)
2114 return -EIO;
2115
2116 if (!f->controller)
2117 return -EINVAL;
2118
2119 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
2120 return -EINVAL;
2121 }
2122 free_key(k);
2123
2124
2125 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY)) { // should never get here
2126 ret = -EACCES;
2127 goto out;
2128 }
2129
2130 if (strcmp(f->file, "tasks") == 0 ||
2131 strcmp(f->file, "/tasks") == 0 ||
2132 strcmp(f->file, "/cgroup.procs") == 0 ||
2133 strcmp(f->file, "cgroup.procs") == 0)
2134 // special case - we have to translate the pids
2135 r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data);
2136 else
2137 r = cgfs_get_value(f->controller, f->cgroup, f->file, &data);
2138
2139 if (!r) {
2140 ret = -EINVAL;
2141 goto out;
2142 }
2143
2144 if (!data) {
2145 ret = 0;
2146 goto out;
2147 }
2148 s = strlen(data);
2149 if (s > size)
2150 s = size;
2151 memcpy(buf, data, s);
2152 if (s > 0 && s < size && data[s-1] != '\n')
2153 buf[s++] = '\n';
2154
2155 ret = s;
2156
2157out:
2158 free(data);
2159 return ret;
2160}
2161
2162static void pid_from_ns(int sock, pid_t tpid)
2163{
2164 pid_t vpid;
2165 struct ucred cred;
2166 char v;
2167 int ret;
2168
2169 cred.uid = 0;
2170 cred.gid = 0;
2171 while (1) {
2172 if (!wait_for_sock(sock, 2)) {
2173 fprintf(stderr, "%s: timeout reading from parent\n", __func__);
2174 _exit(1);
2175 }
2176 if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) {
2177 fprintf(stderr, "%s: bad read from parent: %s\n",
2178 __func__, strerror(errno));
2179 _exit(1);
2180 }
2181 if (vpid == -1) // done
2182 break;
2183 v = '0';
2184 cred.pid = vpid;
2185 if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) {
2186 v = '1';
2187 cred.pid = getpid();
2188 if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK)
2189 _exit(1);
2190 }
2191 }
2192 _exit(0);
2193}
2194
2195static void pid_from_ns_wrapper(int sock, pid_t tpid)
2196{
2197 int newnsfd = -1, ret, cpipe[2];
2198 char fnam[100];
2199 pid_t cpid;
2200 char v;
2201
2202 ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid);
2203 if (ret < 0 || ret >= sizeof(fnam))
2204 _exit(1);
2205 newnsfd = open(fnam, O_RDONLY);
2206 if (newnsfd < 0)
2207 _exit(1);
2208 if (setns(newnsfd, 0) < 0)
2209 _exit(1);
2210 close(newnsfd);
2211
2212 if (pipe(cpipe) < 0)
2213 _exit(1);
2214
2215loop:
2216 cpid = fork();
2217
2218 if (cpid < 0)
2219 _exit(1);
2220
2221 if (!cpid) {
2222 char b = '1';
2223 close(cpipe[0]);
2224 if (write(cpipe[1], &b, sizeof(char)) < 0) {
2225 fprintf(stderr, "%s (child): erorr on write: %s\n",
2226 __func__, strerror(errno));
2227 }
2228 close(cpipe[1]);
2229 pid_from_ns(sock, tpid);
2230 }
2231
2232 // give the child 1 second to be done forking and
2233 // write its ack
2234 if (!wait_for_sock(cpipe[0], 1))
2235 goto again;
2236 ret = read(cpipe[0], &v, 1);
2237 if (ret != sizeof(char) || v != '1') {
2238 goto again;
2239 }
2240
2241 if (!wait_for_pid(cpid))
2242 _exit(1);
2243 _exit(0);
2244
2245again:
2246 kill(cpid, SIGKILL);
2247 wait_for_pid(cpid);
2248 goto loop;
2249}
2250
2251/*
2252 * Given host @uid, return the uid to which it maps in
2253 * @pid's user namespace, or -1 if none.
2254 */
2255bool hostuid_to_ns(uid_t uid, pid_t pid, uid_t *answer)
2256{
2257 FILE *f;
2258 char line[400];
2259
2260 sprintf(line, "/proc/%d/uid_map", pid);
2261 if ((f = fopen(line, "r")) == NULL) {
2262 return false;
2263 }
2264
2265 *answer = convert_id_to_ns(f, uid);
2266 fclose(f);
2267
2268 if (*answer == -1)
2269 return false;
2270 return true;
2271}
2272
2273/*
2274 * get_pid_creds: get the real uid and gid of @pid from
2275 * /proc/$$/status
2276 * (XXX should we use euid here?)
2277 */
2278void get_pid_creds(pid_t pid, uid_t *uid, gid_t *gid)
2279{
2280 char line[400];
2281 uid_t u;
2282 gid_t g;
2283 FILE *f;
2284
2285 *uid = -1;
2286 *gid = -1;
2287 sprintf(line, "/proc/%d/status", pid);
2288 if ((f = fopen(line, "r")) == NULL) {
2289 fprintf(stderr, "Error opening %s: %s\n", line, strerror(errno));
2290 return;
2291 }
2292 while (fgets(line, 400, f)) {
2293 if (strncmp(line, "Uid:", 4) == 0) {
2294 if (sscanf(line+4, "%u", &u) != 1) {
2295 fprintf(stderr, "bad uid line for pid %u\n", pid);
2296 fclose(f);
2297 return;
2298 }
2299 *uid = u;
2300 } else if (strncmp(line, "Gid:", 4) == 0) {
2301 if (sscanf(line+4, "%u", &g) != 1) {
2302 fprintf(stderr, "bad gid line for pid %u\n", pid);
2303 fclose(f);
2304 return;
2305 }
2306 *gid = g;
2307 }
2308 }
2309 fclose(f);
2310}
2311
2312/*
2313 * May the requestor @r move victim @v to a new cgroup?
2314 * This is allowed if
2315 * . they are the same task
2316 * . they are ownedy by the same uid
2317 * . @r is root on the host, or
2318 * . @v's uid is mapped into @r's where @r is root.
2319 */
2320bool may_move_pid(pid_t r, uid_t r_uid, pid_t v)
2321{
2322 uid_t v_uid, tmpuid;
2323 gid_t v_gid;
2324
2325 if (r == v)
2326 return true;
2327 if (r_uid == 0)
2328 return true;
2329 get_pid_creds(v, &v_uid, &v_gid);
2330 if (r_uid == v_uid)
2331 return true;
2332 if (hostuid_to_ns(r_uid, r, &tmpuid) && tmpuid == 0
2333 && hostuid_to_ns(v_uid, r, &tmpuid))
2334 return true;
2335 return false;
2336}
2337
2338static bool do_write_pids(pid_t tpid, uid_t tuid, const char *contrl, const char *cg,
2339 const char *file, const char *buf)
2340{
2341 int sock[2] = {-1, -1};
2342 pid_t qpid, cpid = -1;
2343 FILE *pids_file = NULL;
2344 bool answer = false, fail = false;
2345
2346 pids_file = open_pids_file(contrl, cg);
2347 if (!pids_file)
2348 return false;
2349
2350 /*
2351 * write the pids to a socket, have helper in writer's pidns
2352 * call movepid for us
2353 */
2354 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
2355 perror("socketpair");
2356 goto out;
2357 }
2358
2359 cpid = fork();
2360 if (cpid == -1)
2361 goto out;
2362
2363 if (!cpid) { // child
2364 fclose(pids_file);
2365 pid_from_ns_wrapper(sock[1], tpid);
2366 }
2367
2368 const char *ptr = buf;
2369 while (sscanf(ptr, "%d", &qpid) == 1) {
2370 struct ucred cred;
2371 char v;
2372
2373 if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) {
2374 fprintf(stderr, "%s: error writing pid to child: %s\n",
2375 __func__, strerror(errno));
2376 goto out;
2377 }
2378
2379 if (recv_creds(sock[0], &cred, &v)) {
2380 if (v == '0') {
2381 if (!may_move_pid(tpid, tuid, cred.pid)) {
2382 fail = true;
2383 break;
2384 }
2385 if (fprintf(pids_file, "%d", (int) cred.pid) < 0)
2386 fail = true;
2387 }
2388 }
2389
2390 ptr = strchr(ptr, '\n');
2391 if (!ptr)
2392 break;
2393 ptr++;
2394 }
2395
2396 /* All good, write the value */
2397 qpid = -1;
2398 if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid))
2399 fprintf(stderr, "Warning: failed to ask child to exit\n");
2400
2401 if (!fail)
2402 answer = true;
2403
2404out:
2405 if (cpid != -1)
2406 wait_for_pid(cpid);
2407 if (sock[0] != -1) {
2408 close(sock[0]);
2409 close(sock[1]);
2410 }
2411 if (pids_file) {
2412 if (fclose(pids_file) != 0)
2413 answer = false;
2414 }
2415 return answer;
2416}
2417
2418int cg_write(const char *path, const char *buf, size_t size, off_t offset,
2419 struct fuse_file_info *fi)
2420{
2421 struct fuse_context *fc = fuse_get_context();
2422 char *localbuf = NULL;
2423 struct cgfs_files *k = NULL;
2424 struct file_info *f = (struct file_info *)fi->fh;
2425 bool r;
2426
2427 if (f->type != LXC_TYPE_CGFILE) {
2428 fprintf(stderr, "Internal error: directory cache info used in cg_write\n");
2429 return -EIO;
2430 }
2431
2432 if (offset)
2433 return 0;
2434
2435 if (!fc)
2436 return -EIO;
2437
2438 localbuf = alloca(size+1);
2439 localbuf[size] = '\0';
2440 memcpy(localbuf, buf, size);
2441
2442 if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) {
2443 size = -EINVAL;
2444 goto out;
2445 }
2446
2447 if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY)) {
2448 size = -EACCES;
2449 goto out;
2450 }
2451
2452 if (strcmp(f->file, "tasks") == 0 ||
2453 strcmp(f->file, "/tasks") == 0 ||
2454 strcmp(f->file, "/cgroup.procs") == 0 ||
2455 strcmp(f->file, "cgroup.procs") == 0)
2456 // special case - we have to translate the pids
2457 r = do_write_pids(fc->pid, fc->uid, f->controller, f->cgroup, f->file, localbuf);
2458 else
2459 r = cgfs_set_value(f->controller, f->cgroup, f->file, localbuf);
2460
2461 if (!r)
2462 size = -EINVAL;
2463
2464out:
2465 free_key(k);
2466 return size;
2467}
2468
2469int cg_chown(const char *path, uid_t uid, gid_t gid)
2470{
2471 struct fuse_context *fc = fuse_get_context();
2472 char *cgdir = NULL, *last = NULL, *path1, *path2, *controller;
2473 struct cgfs_files *k = NULL;
2474 const char *cgroup;
2475 int ret;
2476
2477 if (!fc)
2478 return -EIO;
2479
2480 if (strcmp(path, "/cgroup") == 0)
2481 return -EINVAL;
2482
2483 controller = pick_controller_from_path(fc, path);
2484 if (!controller)
2485 return -EINVAL;
2486 cgroup = find_cgroup_in_path(path);
2487 if (!cgroup)
2488 /* this is just /cgroup/controller */
2489 return -EINVAL;
2490
2491 get_cgdir_and_path(cgroup, &cgdir, &last);
2492
2493 if (!last) {
2494 path1 = "/";
2495 path2 = cgdir;
2496 } else {
2497 path1 = cgdir;
2498 path2 = last;
2499 }
2500
2501 if (is_child_cgroup(controller, path1, path2)) {
2502 // get uid, gid, from '/tasks' file and make up a mode
2503 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
2504 k = cgfs_get_key(controller, cgroup, "tasks");
2505
2506 } else
2507 k = cgfs_get_key(controller, path1, path2);
2508
2509 if (!k) {
2510 ret = -EINVAL;
2511 goto out;
2512 }
2513
2514 /*
2515 * This being a fuse request, the uid and gid must be valid
2516 * in the caller's namespace. So we can just check to make
2517 * sure that the caller is root in his uid, and privileged
2518 * over the file's current owner.
2519 */
2520 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) {
2521 ret = -EACCES;
2522 goto out;
2523 }
2524
2525 ret = cgfs_chown_file(controller, cgroup, uid, gid);
2526
2527out:
2528 free_key(k);
2529 free(cgdir);
2530
2531 return ret;
2532}
2533
2534int cg_chmod(const char *path, mode_t mode)
2535{
2536 struct fuse_context *fc = fuse_get_context();
2537 char * cgdir = NULL, *last = NULL, *path1, *path2, *controller;
2538 struct cgfs_files *k = NULL;
2539 const char *cgroup;
2540 int ret;
2541
2542 if (!fc)
2543 return -EIO;
2544
2545 if (strcmp(path, "/cgroup") == 0)
2546 return -EINVAL;
2547
2548 controller = pick_controller_from_path(fc, path);
2549 if (!controller)
2550 return -EINVAL;
2551 cgroup = find_cgroup_in_path(path);
2552 if (!cgroup)
2553 /* this is just /cgroup/controller */
2554 return -EINVAL;
2555
2556 get_cgdir_and_path(cgroup, &cgdir, &last);
2557
2558 if (!last) {
2559 path1 = "/";
2560 path2 = cgdir;
2561 } else {
2562 path1 = cgdir;
2563 path2 = last;
2564 }
2565
2566 if (is_child_cgroup(controller, path1, path2)) {
2567 // get uid, gid, from '/tasks' file and make up a mode
2568 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
2569 k = cgfs_get_key(controller, cgroup, "tasks");
2570
2571 } else
2572 k = cgfs_get_key(controller, path1, path2);
2573
2574 if (!k) {
2575 ret = -EINVAL;
2576 goto out;
2577 }
2578
2579 /*
2580 * This being a fuse request, the uid and gid must be valid
2581 * in the caller's namespace. So we can just check to make
2582 * sure that the caller is root in his uid, and privileged
2583 * over the file's current owner.
2584 */
2585 if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) {
2586 ret = -EPERM;
2587 goto out;
2588 }
2589
2590 if (!cgfs_chmod_file(controller, cgroup, mode)) {
2591 ret = -EINVAL;
2592 goto out;
2593 }
2594
2595 ret = 0;
2596out:
2597 free_key(k);
2598 free(cgdir);
2599 return ret;
2600}
2601
2602int cg_mkdir(const char *path, mode_t mode)
2603{
2604 struct fuse_context *fc = fuse_get_context();
2605 char *last = NULL, *path1, *cgdir = NULL, *controller, *next = NULL;
2606 const char *cgroup;
2607 int ret;
2608
2609 if (!fc)
2610 return -EIO;
2611
2612
2613 controller = pick_controller_from_path(fc, path);
2614 if (!controller)
2615 return -EINVAL;
2616
2617 cgroup = find_cgroup_in_path(path);
2618 if (!cgroup)
2619 return -EINVAL;
2620
2621 get_cgdir_and_path(cgroup, &cgdir, &last);
2622 if (!last)
2623 path1 = "/";
2624 else
2625 path1 = cgdir;
2626
2627 pid_t initpid = lookup_initpid_in_store(fc->pid);
2628 if (initpid <= 0)
2629 initpid = fc->pid;
2630 if (!caller_is_in_ancestor(initpid, controller, path1, &next)) {
2631 if (!next)
2632 ret = -EINVAL;
2633 else if (last && strcmp(next, last) == 0)
2634 ret = -EEXIST;
2635 else
2636 ret = -ENOENT;
2637 goto out;
2638 }
2639
2640 if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) {
2641 ret = -EACCES;
2642 goto out;
2643 }
2644 if (!caller_is_in_ancestor(initpid, controller, path1, NULL)) {
2645 ret = -EACCES;
2646 goto out;
2647 }
2648
2649 ret = cgfs_create(controller, cgroup, fc->uid, fc->gid);
2650
2651out:
2652 free(cgdir);
2653 free(next);
2654 return ret;
2655}
2656
2657int cg_rmdir(const char *path)
2658{
2659 struct fuse_context *fc = fuse_get_context();
2660 char *last = NULL, *cgdir = NULL, *controller, *next = NULL;
2661 const char *cgroup;
2662 int ret;
2663
2664 if (!fc)
2665 return -EIO;
2666
2667 controller = pick_controller_from_path(fc, path);
2668 if (!controller)
2669 return -EINVAL;
2670
2671 cgroup = find_cgroup_in_path(path);
2672 if (!cgroup)
2673 return -EINVAL;
2674
2675 get_cgdir_and_path(cgroup, &cgdir, &last);
2676 if (!last) {
2677 ret = -EINVAL;
2678 goto out;
2679 }
2680
2681 pid_t initpid = lookup_initpid_in_store(fc->pid);
2682 if (initpid <= 0)
2683 initpid = fc->pid;
2684 if (!caller_is_in_ancestor(initpid, controller, cgroup, &next)) {
2685 if (!last || strcmp(next, last) == 0)
2686 ret = -EBUSY;
2687 else
2688 ret = -ENOENT;
2689 goto out;
2690 }
2691
2692 if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) {
2693 ret = -EACCES;
2694 goto out;
2695 }
2696 if (!caller_is_in_ancestor(initpid, controller, cgroup, NULL)) {
2697 ret = -EACCES;
2698 goto out;
2699 }
2700
2701 if (!cgfs_remove(controller, cgroup)) {
2702 ret = -EINVAL;
2703 goto out;
2704 }
2705
2706 ret = 0;
2707
2708out:
2709 free(cgdir);
2710 free(next);
2711 return ret;
2712}
2713
2714static bool startswith(const char *line, const char *pref)
2715{
2716 if (strncmp(line, pref, strlen(pref)) == 0)
2717 return true;
2718 return false;
2719}
2720
2721static void get_mem_cached(char *memstat, unsigned long *v)
2722{
2723 char *eol;
2724
2725 *v = 0;
2726 while (*memstat) {
2727 if (startswith(memstat, "total_cache")) {
2728 sscanf(memstat + 11, "%lu", v);
2729 *v /= 1024;
2730 return;
2731 }
2732 eol = strchr(memstat, '\n');
2733 if (!eol)
2734 return;
2735 memstat = eol+1;
2736 }
2737}
2738
2739static void get_blkio_io_value(char *str, unsigned major, unsigned minor, char *iotype, unsigned long *v)
2740{
2741 char *eol;
2742 char key[32];
2743
2744 memset(key, 0, 32);
2745 snprintf(key, 32, "%u:%u %s", major, minor, iotype);
2746
2747 size_t len = strlen(key);
2748 *v = 0;
2749
2750 while (*str) {
2751 if (startswith(str, key)) {
2752 sscanf(str + len, "%lu", v);
2753 return;
2754 }
2755 eol = strchr(str, '\n');
2756 if (!eol)
2757 return;
2758 str = eol+1;
2759 }
2760}
2761
2762static int read_file(const char *path, char *buf, size_t size,
2763 struct file_info *d)
2764{
2765 size_t linelen = 0, total_len = 0, rv = 0;
2766 char *line = NULL;
2767 char *cache = d->buf;
2768 size_t cache_size = d->buflen;
2769 FILE *f = fopen(path, "r");
2770 if (!f)
2771 return 0;
2772
2773 while (getline(&line, &linelen, f) != -1) {
2774 size_t l = snprintf(cache, cache_size, "%s", line);
2775 if (l < 0) {
2776 perror("Error writing to cache");
2777 rv = 0;
2778 goto err;
2779 }
2780 if (l >= cache_size) {
2781 fprintf(stderr, "Internal error: truncated write to cache\n");
2782 rv = 0;
2783 goto err;
2784 }
2785 cache += l;
2786 cache_size -= l;
2787 total_len += l;
2788 }
2789
2790 d->size = total_len;
2791 if (total_len > size ) total_len = size;
2792
2793 /* read from off 0 */
2794 memcpy(buf, d->buf, total_len);
2795 rv = total_len;
2796 err:
2797 fclose(f);
2798 free(line);
2799 return rv;
2800}
2801
2802/*
2803 * FUSE ops for /proc
2804 */
2805
2806static unsigned long get_memlimit(const char *cgroup)
2807{
2808 char *memlimit_str = NULL;
2809 unsigned long memlimit = -1;
2810
2811 if (cgfs_get_value("memory", cgroup, "memory.limit_in_bytes", &memlimit_str))
2812 memlimit = strtoul(memlimit_str, NULL, 10);
2813
2814 free(memlimit_str);
2815
2816 return memlimit;
2817}
2818
2819static unsigned long get_min_memlimit(const char *cgroup)
2820{
2821 char *copy = strdupa(cgroup);
2822 unsigned long memlimit = 0, retlimit;
2823
2824 retlimit = get_memlimit(copy);
2825
2826 while (strcmp(copy, "/") != 0) {
2827 copy = dirname(copy);
2828 memlimit = get_memlimit(copy);
2829 if (memlimit != -1 && memlimit < retlimit)
2830 retlimit = memlimit;
2831 };
2832
2833 return retlimit;
2834}
2835
2836static int proc_meminfo_read(char *buf, size_t size, off_t offset,
2837 struct fuse_file_info *fi)
2838{
2839 struct fuse_context *fc = fuse_get_context();
2840 struct file_info *d = (struct file_info *)fi->fh;
2841 char *cg;
2842 char *memusage_str = NULL, *memstat_str = NULL,
2843 *memswlimit_str = NULL, *memswusage_str = NULL,
2844 *memswlimit_default_str = NULL, *memswusage_default_str = NULL;
2845 unsigned long memlimit = 0, memusage = 0, memswlimit = 0, memswusage = 0,
2846 cached = 0, hosttotal = 0;
2847 char *line = NULL;
2848 size_t linelen = 0, total_len = 0, rv = 0;
2849 char *cache = d->buf;
2850 size_t cache_size = d->buflen;
2851 FILE *f = NULL;
2852
2853 if (offset){
2854 if (offset > d->size)
2855 return -EINVAL;
2856 if (!d->cached)
2857 return 0;
2858 int left = d->size - offset;
2859 total_len = left > size ? size: left;
2860 memcpy(buf, cache + offset, total_len);
2861 return total_len;
2862 }
2863
2864 pid_t initpid = lookup_initpid_in_store(fc->pid);
2865 if (initpid <= 0)
2866 initpid = fc->pid;
2867 cg = get_pid_cgroup(initpid, "memory");
2868 if (!cg)
2869 return read_file("/proc/meminfo", buf, size, d);
2870
2871 memlimit = get_min_memlimit(cg);
2872 if (!cgfs_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
2873 goto err;
2874 if (!cgfs_get_value("memory", cg, "memory.stat", &memstat_str))
2875 goto err;
2876
2877 // Following values are allowed to fail, because swapaccount might be turned
2878 // off for current kernel
2879 if(cgfs_get_value("memory", cg, "memory.memsw.limit_in_bytes", &memswlimit_str) &&
2880 cgfs_get_value("memory", cg, "memory.memsw.usage_in_bytes", &memswusage_str))
2881 {
2882 /* If swapaccounting is turned on, then default value is assumed to be that of cgroup / */
2883 if (!cgfs_get_value("memory", "/", "memory.memsw.limit_in_bytes", &memswlimit_default_str))
2884 goto err;
2885 if (!cgfs_get_value("memory", "/", "memory.memsw.usage_in_bytes", &memswusage_default_str))
2886 goto err;
2887
2888 memswlimit = strtoul(memswlimit_str, NULL, 10);
2889 memswusage = strtoul(memswusage_str, NULL, 10);
2890
2891 if (!strcmp(memswlimit_str, memswlimit_default_str))
2892 memswlimit = 0;
2893 if (!strcmp(memswusage_str, memswusage_default_str))
2894 memswusage = 0;
2895
2896 memswlimit = memswlimit / 1024;
2897 memswusage = memswusage / 1024;
2898 }
2899
2900 memusage = strtoul(memusage_str, NULL, 10);
2901 memlimit /= 1024;
2902 memusage /= 1024;
2903
2904 get_mem_cached(memstat_str, &cached);
2905
2906 f = fopen("/proc/meminfo", "r");
2907 if (!f)
2908 goto err;
2909
2910 while (getline(&line, &linelen, f) != -1) {
2911 size_t l;
2912 char *printme, lbuf[100];
2913
2914 memset(lbuf, 0, 100);
2915 if (startswith(line, "MemTotal:")) {
2916 sscanf(line+14, "%lu", &hosttotal);
2917 if (hosttotal < memlimit)
2918 memlimit = hosttotal;
2919 snprintf(lbuf, 100, "MemTotal: %8lu kB\n", memlimit);
2920 printme = lbuf;
2921 } else if (startswith(line, "MemFree:")) {
2922 snprintf(lbuf, 100, "MemFree: %8lu kB\n", memlimit - memusage);
2923 printme = lbuf;
2924 } else if (startswith(line, "MemAvailable:")) {
2925 snprintf(lbuf, 100, "MemAvailable: %8lu kB\n", memlimit - memusage);
2926 printme = lbuf;
2927 } else if (startswith(line, "SwapTotal:") && memswlimit > 0) {
2928 snprintf(lbuf, 100, "SwapTotal: %8lu kB\n", memswlimit - memlimit);
2929 printme = lbuf;
2930 } else if (startswith(line, "SwapFree:") && memswlimit > 0 && memswusage > 0) {
2931 snprintf(lbuf, 100, "SwapFree: %8lu kB\n",
2932 (memswlimit - memlimit) - (memswusage - memusage));
2933 printme = lbuf;
2934 } else if (startswith(line, "Buffers:")) {
2935 snprintf(lbuf, 100, "Buffers: %8lu kB\n", 0UL);
2936 printme = lbuf;
2937 } else if (startswith(line, "Cached:")) {
2938 snprintf(lbuf, 100, "Cached: %8lu kB\n", cached);
2939 printme = lbuf;
2940 } else if (startswith(line, "SwapCached:")) {
2941 snprintf(lbuf, 100, "SwapCached: %8lu kB\n", 0UL);
2942 printme = lbuf;
2943 } else
2944 printme = line;
2945
2946 l = snprintf(cache, cache_size, "%s", printme);
2947 if (l < 0) {
2948 perror("Error writing to cache");
2949 rv = 0;
2950 goto err;
2951
2952 }
2953 if (l >= cache_size) {
2954 fprintf(stderr, "Internal error: truncated write to cache\n");
2955 rv = 0;
2956 goto err;
2957 }
2958
2959 cache += l;
2960 cache_size -= l;
2961 total_len += l;
2962 }
2963
2964 d->cached = 1;
2965 d->size = total_len;
2966 if (total_len > size ) total_len = size;
2967 memcpy(buf, d->buf, total_len);
2968
2969 rv = total_len;
2970err:
2971 if (f)
2972 fclose(f);
2973 free(line);
2974 free(cg);
2975 free(memusage_str);
2976 free(memswlimit_str);
2977 free(memswusage_str);
2978 free(memstat_str);
2979 free(memswlimit_default_str);
2980 free(memswusage_default_str);
2981 return rv;
2982}
2983
2984/*
2985 * Read the cpuset.cpus for cg
2986 * Return the answer in a newly allocated string which must be freed
2987 */
2988static char *get_cpuset(const char *cg)
2989{
2990 char *answer;
2991
2992 if (!cgfs_get_value("cpuset", cg, "cpuset.cpus", &answer))
2993 return NULL;
2994 return answer;
2995}
2996
2997bool cpu_in_cpuset(int cpu, const char *cpuset);
2998
2999static bool cpuline_in_cpuset(const char *line, const char *cpuset)
3000{
3001 int cpu;
3002
3003 if (sscanf(line, "processor : %d", &cpu) != 1)
3004 return false;
3005 return cpu_in_cpuset(cpu, cpuset);
3006}
3007
3008/*
3009 * check whether this is a '^processor" line in /proc/cpuinfo
3010 */
3011static bool is_processor_line(const char *line)
3012{
3013 int cpu;
3014
3015 if (sscanf(line, "processor : %d", &cpu) == 1)
3016 return true;
3017 return false;
3018}
3019
3020static int proc_cpuinfo_read(char *buf, size_t size, off_t offset,
3021 struct fuse_file_info *fi)
3022{
3023 struct fuse_context *fc = fuse_get_context();
3024 struct file_info *d = (struct file_info *)fi->fh;
3025 char *cg;
3026 char *cpuset = NULL;
3027 char *line = NULL;
3028 size_t linelen = 0, total_len = 0, rv = 0;
3029 bool am_printing = false;
3030 int curcpu = -1;
3031 char *cache = d->buf;
3032 size_t cache_size = d->buflen;
3033 FILE *f = NULL;
3034
3035 if (offset){
3036 if (offset > d->size)
3037 return -EINVAL;
3038 if (!d->cached)
3039 return 0;
3040 int left = d->size - offset;
3041 total_len = left > size ? size: left;
3042 memcpy(buf, cache + offset, total_len);
3043 return total_len;
3044 }
3045
3046 pid_t initpid = lookup_initpid_in_store(fc->pid);
3047 if (initpid <= 0)
3048 initpid = fc->pid;
3049 cg = get_pid_cgroup(initpid, "cpuset");
3050 if (!cg)
3051 return read_file("proc/cpuinfo", buf, size, d);
3052
3053 cpuset = get_cpuset(cg);
3054 if (!cpuset)
3055 goto err;
3056
3057 f = fopen("/proc/cpuinfo", "r");
3058 if (!f)
3059 goto err;
3060
3061 while (getline(&line, &linelen, f) != -1) {
3062 size_t l;
3063 if (is_processor_line(line)) {
3064 am_printing = cpuline_in_cpuset(line, cpuset);
3065 if (am_printing) {
3066 curcpu ++;
3067 l = snprintf(cache, cache_size, "processor : %d\n", curcpu);
3068 if (l < 0) {
3069 perror("Error writing to cache");
3070 rv = 0;
3071 goto err;
3072 }
3073 if (l >= cache_size) {
3074 fprintf(stderr, "Internal error: truncated write to cache\n");
3075 rv = 0;
3076 goto err;
3077 }
3078 cache += l;
3079 cache_size -= l;
3080 total_len += l;
3081 }
3082 continue;
3083 }
3084 if (am_printing) {
3085 l = snprintf(cache, cache_size, "%s", line);
3086 if (l < 0) {
3087 perror("Error writing to cache");
3088 rv = 0;
3089 goto err;
3090 }
3091 if (l >= cache_size) {
3092 fprintf(stderr, "Internal error: truncated write to cache\n");
3093 rv = 0;
3094 goto err;
3095 }
3096 cache += l;
3097 cache_size -= l;
3098 total_len += l;
3099 }
3100 }
3101
3102 d->cached = 1;
3103 d->size = total_len;
3104 if (total_len > size ) total_len = size;
3105
3106 /* read from off 0 */
3107 memcpy(buf, d->buf, total_len);
3108 rv = total_len;
3109err:
3110 if (f)
3111 fclose(f);
3112 free(line);
3113 free(cpuset);
3114 free(cg);
3115 return rv;
3116}
3117
3118static int proc_stat_read(char *buf, size_t size, off_t offset,
3119 struct fuse_file_info *fi)
3120{
3121 struct fuse_context *fc = fuse_get_context();
3122 struct file_info *d = (struct file_info *)fi->fh;
3123 char *cg;
3124 char *cpuset = NULL;
3125 char *line = NULL;
3126 size_t linelen = 0, total_len = 0, rv = 0;
3127 int curcpu = -1; /* cpu numbering starts at 0 */
3128 unsigned long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0;
3129 unsigned long user_sum = 0, nice_sum = 0, system_sum = 0, idle_sum = 0, iowait_sum = 0,
3130 irq_sum = 0, softirq_sum = 0, steal_sum = 0, guest_sum = 0;
3131#define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
3132 char cpuall[CPUALL_MAX_SIZE];
3133 /* reserve for cpu all */
3134 char *cache = d->buf + CPUALL_MAX_SIZE;
3135 size_t cache_size = d->buflen - CPUALL_MAX_SIZE;
3136 FILE *f = NULL;
3137
3138 if (offset){
3139 if (offset > d->size)
3140 return -EINVAL;
3141 if (!d->cached)
3142 return 0;
3143 int left = d->size - offset;
3144 total_len = left > size ? size: left;
3145 memcpy(buf, d->buf + offset, total_len);
3146 return total_len;
3147 }
3148
3149 pid_t initpid = lookup_initpid_in_store(fc->pid);
3150 if (initpid <= 0)
3151 initpid = fc->pid;
3152 cg = get_pid_cgroup(initpid, "cpuset");
3153 if (!cg)
3154 return read_file("/proc/stat", buf, size, d);
3155
3156 cpuset = get_cpuset(cg);
3157 if (!cpuset)
3158 goto err;
3159
3160 f = fopen("/proc/stat", "r");
3161 if (!f)
3162 goto err;
3163
3164 //skip first line
3165 if (getline(&line, &linelen, f) < 0) {
3166 fprintf(stderr, "proc_stat_read read first line failed\n");
3167 goto err;
3168 }
3169
3170 while (getline(&line, &linelen, f) != -1) {
3171 size_t l;
3172 int cpu;
3173 char cpu_char[10]; /* That's a lot of cores */
3174 char *c;
3175
3176 if (sscanf(line, "cpu%9[^ ]", cpu_char) != 1) {
3177 /* not a ^cpuN line containing a number N, just print it */
3178 l = snprintf(cache, cache_size, "%s", line);
3179 if (l < 0) {
3180 perror("Error writing to cache");
3181 rv = 0;
3182 goto err;
3183 }
3184 if (l >= cache_size) {
3185 fprintf(stderr, "Internal error: truncated write to cache\n");
3186 rv = 0;
3187 goto err;
3188 }
3189 cache += l;
3190 cache_size -= l;
3191 total_len += l;
3192 continue;
3193 }
3194
3195 if (sscanf(cpu_char, "%d", &cpu) != 1)
3196 continue;
3197 if (!cpu_in_cpuset(cpu, cpuset))
3198 continue;
3199 curcpu ++;
3200
3201 c = strchr(line, ' ');
3202 if (!c)
3203 continue;
3204 l = snprintf(cache, cache_size, "cpu%d%s", curcpu, c);
3205 if (l < 0) {
3206 perror("Error writing to cache");
3207 rv = 0;
3208 goto err;
3209
3210 }
3211 if (l >= cache_size) {
3212 fprintf(stderr, "Internal error: truncated write to cache\n");
3213 rv = 0;
3214 goto err;
3215 }
3216
3217 cache += l;
3218 cache_size -= l;
3219 total_len += l;
3220
3221 if (sscanf(line, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &system, &idle, &iowait, &irq,
3222 &softirq, &steal, &guest) != 9)
3223 continue;
3224 user_sum += user;
3225 nice_sum += nice;
3226 system_sum += system;
3227 idle_sum += idle;
3228 iowait_sum += iowait;
3229 irq_sum += irq;
3230 softirq_sum += softirq;
3231 steal_sum += steal;
3232 guest_sum += guest;
3233 }
3234
3235 cache = d->buf;
3236
3237 int cpuall_len = snprintf(cpuall, CPUALL_MAX_SIZE, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
3238 "cpu ", user_sum, nice_sum, system_sum, idle_sum, iowait_sum, irq_sum, softirq_sum, steal_sum, guest_sum);
3239 if (cpuall_len > 0 && cpuall_len < CPUALL_MAX_SIZE){
3240 memcpy(cache, cpuall, cpuall_len);
3241 cache += cpuall_len;
3242 } else{
3243 /* shouldn't happen */
3244 fprintf(stderr, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len);
3245 cpuall_len = 0;
3246 }
3247
3248 memmove(cache, d->buf + CPUALL_MAX_SIZE, total_len);
3249 total_len += cpuall_len;
3250 d->cached = 1;
3251 d->size = total_len;
3252 if (total_len > size ) total_len = size;
3253
3254 memcpy(buf, d->buf, total_len);
3255 rv = total_len;
3256
3257err:
3258 if (f)
3259 fclose(f);
3260 free(line);
3261 free(cpuset);
3262 free(cg);
3263 return rv;
3264}
3265
3266static long int getreaperage(pid_t pid)
3267{
3268 char fnam[100];
3269 struct stat sb;
3270 int ret;
3271 pid_t qpid;
3272
3273 qpid = lookup_initpid_in_store(pid);
3274 if (qpid <= 0)
3275 return 0;
3276
3277 ret = snprintf(fnam, 100, "/proc/%d", qpid);
3278 if (ret < 0 || ret >= 100)
3279 return 0;
3280
3281 if (lstat(fnam, &sb) < 0)
3282 return 0;
3283
3284 return time(NULL) - sb.st_ctime;
3285}
3286
3287static unsigned long get_reaper_busy(pid_t task)
3288{
3289 pid_t initpid = lookup_initpid_in_store(task);
3290 char *cgroup = NULL, *usage_str = NULL;
3291 unsigned long usage = 0;
3292
3293 if (initpid <= 0)
3294 return 0;
3295
3296 cgroup = get_pid_cgroup(initpid, "cpuacct");
3297 if (!cgroup)
3298 goto out;
3299 if (!cgfs_get_value("cpuacct", cgroup, "cpuacct.usage", &usage_str))
3300 goto out;
3301 usage = strtoul(usage_str, NULL, 10);
3302 usage /= 1000000000;
3303
3304out:
3305 free(cgroup);
3306 free(usage_str);
3307 return usage;
3308}
3309
3310#if RELOADTEST
3311void iwashere(void)
3312{
3313 char *name, *cwd = get_current_dir_name();
3314 size_t len;
3315 int fd;
3316
3317 if (!cwd)
3318 exit(1);
3319 len = strlen(cwd) + strlen("/iwashere") + 1;
3320 name = alloca(len);
3321 snprintf(name, len, "%s/iwashere", cwd);
3322 free(cwd);
3323 fd = creat(name, 0755);
3324 if (fd >= 0)
3325 close(fd);
3326}
3327#endif
3328
3329/*
3330 * We read /proc/uptime and reuse its second field.
3331 * For the first field, we use the mtime for the reaper for
3332 * the calling pid as returned by getreaperage
3333 */
3334static int proc_uptime_read(char *buf, size_t size, off_t offset,
3335 struct fuse_file_info *fi)
3336{
3337 struct fuse_context *fc = fuse_get_context();
3338 struct file_info *d = (struct file_info *)fi->fh;
3339 long int reaperage = getreaperage(fc->pid);
3340 unsigned long int busytime = get_reaper_busy(fc->pid), idletime;
3341 char *cache = d->buf;
3342 size_t total_len = 0;
3343
3344#if RELOADTEST
3345 iwashere();
3346#endif
3347
3348 if (offset){
3349 if (offset > d->size)
3350 return -EINVAL;
3351 if (!d->cached)
3352 return 0;
3353 int left = d->size - offset;
3354 total_len = left > size ? size: left;
3355 memcpy(buf, cache + offset, total_len);
3356 return total_len;
3357 }
3358
3359 idletime = reaperage - busytime;
3360 if (idletime > reaperage)
3361 idletime = reaperage;
3362
3363 total_len = snprintf(d->buf, d->size, "%ld.0 %lu.0\n", reaperage, idletime);
3364 if (total_len < 0){
3365 perror("Error writing to cache");
3366 return 0;
3367 }
3368
3369 d->size = (int)total_len;
3370 d->cached = 1;
3371
3372 if (total_len > size) total_len = size;
3373
3374 memcpy(buf, d->buf, total_len);
3375 return total_len;
3376}
3377
3378static int proc_diskstats_read(char *buf, size_t size, off_t offset,
3379 struct fuse_file_info *fi)
3380{
3381 char dev_name[72];
3382 struct fuse_context *fc = fuse_get_context();
3383 struct file_info *d = (struct file_info *)fi->fh;
3384 char *cg;
3385 char *io_serviced_str = NULL, *io_merged_str = NULL, *io_service_bytes_str = NULL,
3386 *io_wait_time_str = NULL, *io_service_time_str = NULL;
3387 unsigned long read = 0, write = 0;
3388 unsigned long read_merged = 0, write_merged = 0;
3389 unsigned long read_sectors = 0, write_sectors = 0;
3390 unsigned long read_ticks = 0, write_ticks = 0;
3391 unsigned long ios_pgr = 0, tot_ticks = 0, rq_ticks = 0;
3392 unsigned long rd_svctm = 0, wr_svctm = 0, rd_wait = 0, wr_wait = 0;
3393 char *cache = d->buf;
3394 size_t cache_size = d->buflen;
3395 char *line = NULL;
3396 size_t linelen = 0, total_len = 0, rv = 0;
3397 unsigned int major = 0, minor = 0;
3398 int i = 0;
3399 FILE *f = NULL;
3400
3401 if (offset){
3402 if (offset > d->size)
3403 return -EINVAL;
3404 if (!d->cached)
3405 return 0;
3406 int left = d->size - offset;
3407 total_len = left > size ? size: left;
3408 memcpy(buf, cache + offset, total_len);
3409 return total_len;
3410 }
3411
3412 pid_t initpid = lookup_initpid_in_store(fc->pid);
3413 if (initpid <= 0)
3414 initpid = fc->pid;
3415 cg = get_pid_cgroup(initpid, "blkio");
3416 if (!cg)
3417 return read_file("/proc/diskstats", buf, size, d);
3418
3419 if (!cgfs_get_value("blkio", cg, "blkio.io_serviced", &io_serviced_str))
3420 goto err;
3421 if (!cgfs_get_value("blkio", cg, "blkio.io_merged", &io_merged_str))
3422 goto err;
3423 if (!cgfs_get_value("blkio", cg, "blkio.io_service_bytes", &io_service_bytes_str))
3424 goto err;
3425 if (!cgfs_get_value("blkio", cg, "blkio.io_wait_time", &io_wait_time_str))
3426 goto err;
3427 if (!cgfs_get_value("blkio", cg, "blkio.io_service_time", &io_service_time_str))
3428 goto err;
3429
3430
3431 f = fopen("/proc/diskstats", "r");
3432 if (!f)
3433 goto err;
3434
3435 while (getline(&line, &linelen, f) != -1) {
3436 size_t l;
3437 char *printme, lbuf[256];
3438
3439 i = sscanf(line, "%u %u %71s", &major, &minor, dev_name);
3440 if(i == 3){
3441 get_blkio_io_value(io_serviced_str, major, minor, "Read", &read);
3442 get_blkio_io_value(io_serviced_str, major, minor, "Write", &write);
3443 get_blkio_io_value(io_merged_str, major, minor, "Read", &read_merged);
3444 get_blkio_io_value(io_merged_str, major, minor, "Write", &write_merged);
3445 get_blkio_io_value(io_service_bytes_str, major, minor, "Read", &read_sectors);
3446 read_sectors = read_sectors/512;
3447 get_blkio_io_value(io_service_bytes_str, major, minor, "Write", &write_sectors);
3448 write_sectors = write_sectors/512;
3449
3450 get_blkio_io_value(io_service_time_str, major, minor, "Read", &rd_svctm);
3451 rd_svctm = rd_svctm/1000000;
3452 get_blkio_io_value(io_wait_time_str, major, minor, "Read", &rd_wait);
3453 rd_wait = rd_wait/1000000;
3454 read_ticks = rd_svctm + rd_wait;
3455
3456 get_blkio_io_value(io_service_time_str, major, minor, "Write", &wr_svctm);
3457 wr_svctm = wr_svctm/1000000;
3458 get_blkio_io_value(io_wait_time_str, major, minor, "Write", &wr_wait);
3459 wr_wait = wr_wait/1000000;
3460 write_ticks = wr_svctm + wr_wait;
3461
3462 get_blkio_io_value(io_service_time_str, major, minor, "Total", &tot_ticks);
3463 tot_ticks = tot_ticks/1000000;
3464 }else{
3465 continue;
3466 }
3467
3468 memset(lbuf, 0, 256);
3469 if (read || write || read_merged || write_merged || read_sectors || write_sectors || read_ticks || write_ticks) {
3470 snprintf(lbuf, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
3471 major, minor, dev_name, read, read_merged, read_sectors, read_ticks,
3472 write, write_merged, write_sectors, write_ticks, ios_pgr, tot_ticks, rq_ticks);
3473 printme = lbuf;
3474 } else
3475 continue;
3476
3477 l = snprintf(cache, cache_size, "%s", printme);
3478 if (l < 0) {
3479 perror("Error writing to fuse buf");
3480 rv = 0;
3481 goto err;
3482 }
3483 if (l >= cache_size) {
3484 fprintf(stderr, "Internal error: truncated write to cache\n");
3485 rv = 0;
3486 goto err;
3487 }
3488 cache += l;
3489 cache_size -= l;
3490 total_len += l;
3491 }
3492
3493 d->cached = 1;
3494 d->size = total_len;
3495 if (total_len > size ) total_len = size;
3496 memcpy(buf, d->buf, total_len);
3497
3498 rv = total_len;
3499err:
3500 free(cg);
3501 if (f)
3502 fclose(f);
3503 free(line);
3504 free(io_serviced_str);
3505 free(io_merged_str);
3506 free(io_service_bytes_str);
3507 free(io_wait_time_str);
3508 free(io_service_time_str);
3509 return rv;
3510}
3511
70dcc12e
SH
3512static int proc_swaps_read(char *buf, size_t size, off_t offset,
3513 struct fuse_file_info *fi)
3514{
3515 struct fuse_context *fc = fuse_get_context();
3516 struct file_info *d = (struct file_info *)fi->fh;
3517 char *cg = NULL;
3518 char *memswlimit_str = NULL, *memlimit_str = NULL, *memusage_str = NULL, *memswusage_str = NULL,
3519 *memswlimit_default_str = NULL, *memswusage_default_str = NULL;
3520 unsigned long memswlimit = 0, memlimit = 0, memusage = 0, memswusage = 0, swap_total = 0, swap_free = 0;
3521 size_t total_len = 0, rv = 0;
3522 char *cache = d->buf;
3523
3524 if (offset) {
3525 if (offset > d->size)
3526 return -EINVAL;
3527 if (!d->cached)
3528 return 0;
3529 int left = d->size - offset;
3530 total_len = left > size ? size: left;
3531 memcpy(buf, cache + offset, total_len);
3532 return total_len;
3533 }
3534
3535 pid_t initpid = lookup_initpid_in_store(fc->pid);
3536 if (initpid <= 0)
3537 initpid = fc->pid;
3538 cg = get_pid_cgroup(initpid, "memory");
3539 if (!cg)
3540 return read_file("/proc/swaps", buf, size, d);
3541
3542 if (!cgfs_get_value("memory", cg, "memory.limit_in_bytes", &memlimit_str))
3543 goto err;
3544
3545 if (!cgfs_get_value("memory", cg, "memory.usage_in_bytes", &memusage_str))
3546 goto err;
3547
3548 memlimit = strtoul(memlimit_str, NULL, 10);
3549 memusage = strtoul(memusage_str, NULL, 10);
3550
3551 if (cgfs_get_value("memory", cg, "memory.memsw.usage_in_bytes", &memswusage_str) &&
3552 cgfs_get_value("memory", cg, "memory.memsw.limit_in_bytes", &memswlimit_str)) {
3553
3554 /* If swap accounting is turned on, then default value is assumed to be that of cgroup / */
3555 if (!cgfs_get_value("memory", "/", "memory.memsw.limit_in_bytes", &memswlimit_default_str))
3556 goto err;
3557 if (!cgfs_get_value("memory", "/", "memory.memsw.usage_in_bytes", &memswusage_default_str))
3558 goto err;
3559
3560 memswlimit = strtoul(memswlimit_str, NULL, 10);
3561 memswusage = strtoul(memswusage_str, NULL, 10);
3562
3563 if (!strcmp(memswlimit_str, memswlimit_default_str))
3564 memswlimit = 0;
3565 if (!strcmp(memswusage_str, memswusage_default_str))
3566 memswusage = 0;
3567
3568 swap_total = (memswlimit - memlimit) / 1024;
3569 swap_free = (memswusage - memusage) / 1024;
3570 }
3571
3572 total_len = snprintf(d->buf, d->size, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
3573
3574 /* When no mem + swap limit is specified or swapaccount=0*/
3575 if (!memswlimit) {
3576 char *line = NULL;
3577 size_t linelen = 0;
3578 FILE *f = fopen("/proc/meminfo", "r");
3579
3580 if (!f)
3581 goto err;
3582
3583 while (getline(&line, &linelen, f) != -1) {
3584 if (startswith(line, "SwapTotal:")) {
3585 sscanf(line, "SwapTotal: %8lu kB", &swap_total);
3586 } else if (startswith(line, "SwapFree:")) {
3587 sscanf(line, "SwapFree: %8lu kB", &swap_free);
3588 }
3589 }
3590
3591 free(line);
3592 fclose(f);
3593 }
3594
3595 if (swap_total > 0) {
42eba700 3596 total_len += snprintf(d->buf + total_len, d->size - total_len,
70dcc12e
SH
3597 "none%*svirtual\t\t%lu\t%lu\t0\n", 36, " ",
3598 swap_total, swap_free);
3599 }
3600
3601 if (total_len < 0) {
3602 perror("Error writing to cache");
3603 rv = 0;
3604 goto err;
3605 }
3606
3607 d->cached = 1;
3608 d->size = (int)total_len;
3609
3610 if (total_len > size) total_len = size;
3611 memcpy(buf, d->buf, total_len);
3612 rv = total_len;
3613
3614err:
3615 free(cg);
3616 free(memswlimit_str);
3617 free(memlimit_str);
3618 free(memusage_str);
3619 free(memswusage_str);
3620 free(memswusage_default_str);
3621 free(memswlimit_default_str);
3622 return rv;
3623}
3624
237e200e
SH
3625static off_t get_procfile_size(const char *which)
3626{
3627 FILE *f = fopen(which, "r");
3628 char *line = NULL;
3629 size_t len = 0;
3630 ssize_t sz, answer = 0;
3631 if (!f)
3632 return 0;
3633
3634 while ((sz = getline(&line, &len, f)) != -1)
3635 answer += sz;
3636 fclose (f);
3637 free(line);
3638
3639 return answer;
3640}
3641
3642int proc_getattr(const char *path, struct stat *sb)
3643{
3644 struct timespec now;
3645
3646 memset(sb, 0, sizeof(struct stat));
3647 if (clock_gettime(CLOCK_REALTIME, &now) < 0)
3648 return -EINVAL;
3649 sb->st_uid = sb->st_gid = 0;
3650 sb->st_atim = sb->st_mtim = sb->st_ctim = now;
3651 if (strcmp(path, "/proc") == 0) {
3652 sb->st_mode = S_IFDIR | 00555;
3653 sb->st_nlink = 2;
3654 return 0;
3655 }
3656 if (strcmp(path, "/proc/meminfo") == 0 ||
3657 strcmp(path, "/proc/cpuinfo") == 0 ||
3658 strcmp(path, "/proc/uptime") == 0 ||
3659 strcmp(path, "/proc/stat") == 0 ||
70dcc12e
SH
3660 strcmp(path, "/proc/diskstats") == 0 ||
3661 strcmp(path, "/proc/swaps") == 0) {
237e200e
SH
3662 sb->st_size = 0;
3663 sb->st_mode = S_IFREG | 00444;
3664 sb->st_nlink = 1;
3665 return 0;
3666 }
3667
3668 return -ENOENT;
3669}
3670
3671int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset,
3672 struct fuse_file_info *fi)
3673{
3674 if (filler(buf, "cpuinfo", NULL, 0) != 0 ||
3675 filler(buf, "meminfo", NULL, 0) != 0 ||
3676 filler(buf, "stat", NULL, 0) != 0 ||
3677 filler(buf, "uptime", NULL, 0) != 0 ||
70dcc12e
SH
3678 filler(buf, "diskstats", NULL, 0) != 0 ||
3679 filler(buf, "swaps", NULL, 0) != 0)
237e200e
SH
3680 return -EINVAL;
3681 return 0;
3682}
3683
3684int proc_open(const char *path, struct fuse_file_info *fi)
3685{
3686 int type = -1;
3687 struct file_info *info;
3688
3689 if (strcmp(path, "/proc/meminfo") == 0)
3690 type = LXC_TYPE_PROC_MEMINFO;
3691 else if (strcmp(path, "/proc/cpuinfo") == 0)
3692 type = LXC_TYPE_PROC_CPUINFO;
3693 else if (strcmp(path, "/proc/uptime") == 0)
3694 type = LXC_TYPE_PROC_UPTIME;
3695 else if (strcmp(path, "/proc/stat") == 0)
3696 type = LXC_TYPE_PROC_STAT;
3697 else if (strcmp(path, "/proc/diskstats") == 0)
3698 type = LXC_TYPE_PROC_DISKSTATS;
70dcc12e
SH
3699 else if (strcmp(path, "/proc/swaps") == 0)
3700 type = LXC_TYPE_PROC_SWAPS;
237e200e
SH
3701 if (type == -1)
3702 return -ENOENT;
3703
3704 info = malloc(sizeof(*info));
3705 if (!info)
3706 return -ENOMEM;
3707
3708 memset(info, 0, sizeof(*info));
3709 info->type = type;
3710
3711 info->buflen = get_procfile_size(path) + BUF_RESERVE_SIZE;
3712 do {
3713 info->buf = malloc(info->buflen);
3714 } while (!info->buf);
3715 memset(info->buf, 0, info->buflen);
3716 /* set actual size to buffer size */
3717 info->size = info->buflen;
3718
3719 fi->fh = (unsigned long)info;
3720 return 0;
3721}
3722
3723int proc_release(const char *path, struct fuse_file_info *fi)
3724{
3725 struct file_info *f = (struct file_info *)fi->fh;
3726
3727 do_release_file_info(f);
3728 return 0;
3729}
3730
3731int proc_read(const char *path, char *buf, size_t size, off_t offset,
3732 struct fuse_file_info *fi)
3733{
3734 struct file_info *f = (struct file_info *) fi->fh;
3735
3736 switch (f->type) {
3737 case LXC_TYPE_PROC_MEMINFO:
3738 return proc_meminfo_read(buf, size, offset, fi);
3739 case LXC_TYPE_PROC_CPUINFO:
3740 return proc_cpuinfo_read(buf, size, offset, fi);
3741 case LXC_TYPE_PROC_UPTIME:
3742 return proc_uptime_read(buf, size, offset, fi);
3743 case LXC_TYPE_PROC_STAT:
3744 return proc_stat_read(buf, size, offset, fi);
3745 case LXC_TYPE_PROC_DISKSTATS:
3746 return proc_diskstats_read(buf, size, offset, fi);
70dcc12e
SH
3747 case LXC_TYPE_PROC_SWAPS:
3748 return proc_swaps_read(buf, size, offset, fi);
237e200e
SH
3749 default:
3750 return -EINVAL;
3751 }
3752}
3753
3754static void __attribute__((constructor)) collect_subsystems(void)
3755{
3756 FILE *f;
3757 char *line = NULL;
3758 size_t len = 0;
3759
3760 if ((f = fopen("/proc/self/cgroup", "r")) == NULL) {
3761 fprintf(stderr, "Error opening /proc/self/cgroup: %s\n", strerror(errno));
3762 return;
3763 }
3764 while (getline(&line, &len, f) != -1) {
3765 char *p, *p2;
3766
3767 p = strchr(line, ':');
3768 if (!p)
3769 goto out;
3770 *(p++) = '\0';
3771
3772 p2 = strrchr(p, ':');
3773 if (!p2)
3774 goto out;
3775 *p2 = '\0';
3776
3777 if (!store_hierarchy(line, p))
3778 goto out;
3779 }
3780
3781 print_subsystems();
3782
3783out:
3784 free(line);
3785 fclose(f);
3786}
3787
3788static void __attribute__((destructor)) free_subsystems(void)
3789{
3790 int i;
3791
3792 for (i = 0; i < num_hierarchies; i++)
3793 if (hierarchies[i])
3794 free(hierarchies[i]);
3795 free(hierarchies);
3796}