3 * Copyright © 2014-2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
6 * See COPYING file for details.
9 #define FUSE_USE_VERSION 26
24 #include <linux/sched.h>
25 #include <sys/socket.h>
26 #include <sys/mount.h>
27 #include <sys/epoll.h>
31 #define GLIB_DISABLE_DEPRECATION_WARNINGS
32 #include <glib-object.h>
36 #include "config.h" // for VERSION
41 LXC_TYPE_PROC_MEMINFO
,
42 LXC_TYPE_PROC_CPUINFO
,
45 LXC_TYPE_PROC_DISKSTATS
,
53 char *buf
; // unused as of yet
55 int size
; //actual data size
59 /* reserve buffer size, for cpuall in /proc/stat */
60 #define BUF_RESERVE_SIZE 256
63 * A table caching which pid is init for a pid namespace.
64 * When looking up which pid is init for $qpid, we first
65 * 1. Stat /proc/$qpid/ns/pid.
66 * 2. Check whether the ino_t is in our store.
67 * a. if not, fork a child in qpid's ns to send us
68 * ucred.pid = 1, and read the initpid. Cache
69 * initpid and creation time for /proc/initpid
70 * in a new store entry.
71 * b. if so, verify that /proc/initpid still matches
72 * what we have saved. If not, clear the store
73 * entry and go back to a. If so, return the
76 struct pidns_init_store
{
77 ino_t ino
; // inode number for /proc/$pid/ns/pid
78 pid_t initpid
; // the pid of nit in that ns
79 long int ctime
; // the time at which /proc/$initpid was created
80 struct pidns_init_store
*next
;
84 /* lol - look at how they are allocated in the kernel */
85 #define PIDNS_HASH_SIZE 4096
86 #define HASH(x) ((x) % PIDNS_HASH_SIZE)
88 struct pidns_init_store
*pidns_hash_table
[PIDNS_HASH_SIZE
];
89 static pthread_mutex_t pidns_store_mutex
= PTHREAD_MUTEX_INITIALIZER
;
90 static void lock_mutex(pthread_mutex_t
*l
)
94 if ((ret
= pthread_mutex_lock(l
)) != 0) {
95 fprintf(stderr
, "pthread_mutex_lock returned:%d %s\n", ret
, strerror(ret
));
100 static void unlock_mutex(pthread_mutex_t
*l
)
104 if ((ret
= pthread_mutex_unlock(l
)) != 0) {
105 fprintf(stderr
, "pthread_mutex_unlock returned:%d %s\n", ret
, strerror(ret
));
110 static void store_lock(void)
112 lock_mutex(&pidns_store_mutex
);
115 static void store_unlock(void)
117 unlock_mutex(&pidns_store_mutex
);
120 /* Must be called under store_lock */
121 static bool initpid_still_valid(struct pidns_init_store
*e
, struct stat
*nsfdsb
)
126 snprintf(fnam
, 100, "/proc/%d", e
->initpid
);
127 if (stat(fnam
, &initsb
) < 0)
130 fprintf(stderr
, "comparing ctime %ld %ld for pid %d\n",
131 e
->ctime
, initsb
.st_ctime
, e
->initpid
);
133 if (e
->ctime
!= initsb
.st_ctime
)
138 /* Must be called under store_lock */
139 static void remove_initpid(struct pidns_init_store
*e
)
141 struct pidns_init_store
*tmp
;
145 fprintf(stderr
, "remove_initpid: removing entry for %d\n", e
->initpid
);
148 if (pidns_hash_table
[h
] == e
) {
149 pidns_hash_table
[h
] = e
->next
;
154 tmp
= pidns_hash_table
[h
];
156 if (tmp
->next
== e
) {
166 /* Must be called under store_lock */
167 static void prune_initpid_store(void)
169 static long int last_prune
= 0;
170 struct pidns_init_store
*e
, *prev
, *delme
;
171 long int now
, threshold
;
175 last_prune
= time(NULL
);
179 if (now
< last_prune
+ PURGE_SECS
)
182 fprintf(stderr
, "pruning\n");
185 threshold
= now
- 2 * PURGE_SECS
;
187 for (i
= 0; i
< PIDNS_HASH_SIZE
; i
++) {
188 for (prev
= NULL
, e
= pidns_hash_table
[i
]; e
; ) {
189 if (e
->lastcheck
< threshold
) {
191 fprintf(stderr
, "Removing cached entry for %d\n", e
->initpid
);
195 prev
->next
= e
->next
;
197 pidns_hash_table
[i
] = e
->next
;
208 /* Must be called under store_lock */
209 static void save_initpid(struct stat
*sb
, pid_t pid
)
211 struct pidns_init_store
*e
;
217 fprintf(stderr
, "save_initpid: adding entry for %d\n", pid
);
219 snprintf(fpath
, 100, "/proc/%d", pid
);
220 if (stat(fpath
, &procsb
) < 0)
223 e
= malloc(sizeof(*e
));
227 e
->ctime
= procsb
.st_ctime
;
229 e
->next
= pidns_hash_table
[h
];
230 e
->lastcheck
= time(NULL
);
231 pidns_hash_table
[h
] = e
;
235 * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store
236 * entry for the inode number and creation time. Verify that the init pid
237 * is still valid. If not, remove it. Return the entry if valid, NULL
239 * Must be called under store_lock
241 static struct pidns_init_store
*lookup_verify_initpid(struct stat
*sb
)
243 int h
= HASH(sb
->st_ino
);
244 struct pidns_init_store
*e
= pidns_hash_table
[h
];
247 if (e
->ino
== sb
->st_ino
) {
248 if (initpid_still_valid(e
, sb
)) {
249 e
->lastcheck
= time(NULL
);
261 #define SEND_CREDS_OK 0
262 #define SEND_CREDS_NOTSK 1
263 #define SEND_CREDS_FAIL 2
264 static bool recv_creds(int sock
, struct ucred
*cred
, char *v
);
265 static int wait_for_pid(pid_t pid
);
266 static int send_creds(int sock
, struct ucred
*cred
, char v
, bool pingfirst
);
269 * fork a task which switches to @task's namespace and writes '1'.
270 * over a unix sock so we can read the task's reaper's pid in our
273 static void write_task_init_pid_exit(int sock
, pid_t target
)
281 ret
= snprintf(fnam
, sizeof(fnam
), "/proc/%d/ns/pid", (int)target
);
282 if (ret
< 0 || ret
>= sizeof(fnam
))
285 fd
= open(fnam
, O_RDONLY
);
287 perror("write_task_init_pid_exit open of ns/pid");
291 perror("write_task_init_pid_exit setns 1");
299 if (!wait_for_pid(pid
))
304 /* we are the child */
309 if (send_creds(sock
, &cred
, v
, true) != SEND_CREDS_OK
)
314 static pid_t
get_init_pid_for_task(pid_t task
)
322 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sock
) < 0) {
323 perror("socketpair");
332 write_task_init_pid_exit(sock
[0], task
);
336 if (!recv_creds(sock
[1], &cred
, &v
))
348 static pid_t
lookup_initpid_in_store(pid_t qpid
)
352 struct pidns_init_store
*e
;
355 snprintf(fnam
, 100, "/proc/%d/ns/pid", qpid
);
357 if (stat(fnam
, &sb
) < 0)
359 e
= lookup_verify_initpid(&sb
);
364 answer
= get_init_pid_for_task(qpid
);
366 save_initpid(&sb
, answer
);
369 /* we prune at end in case we are returning
370 * the value we were about to return */
371 prune_initpid_store();
376 static int wait_for_pid(pid_t pid
)
384 ret
= waitpid(pid
, &status
, 0);
392 if (!WIFEXITED(status
) || WEXITSTATUS(status
) != 0)
399 * append pid to *src.
400 * src: a pointer to a char* in which ot append the pid.
401 * sz: the number of characters printed so far, minus trailing \0.
402 * asz: the allocated size so far
403 * pid: the pid to append
405 static void must_strcat_pid(char **src
, size_t *sz
, size_t *asz
, pid_t pid
)
409 int tmplen
= sprintf(tmp
, "%d\n", (int)pid
);
411 if (!*src
|| tmplen
+ *sz
+ 1 >= *asz
) {
414 tmp
= realloc(*src
, *asz
+ BUF_RESERVE_SIZE
);
417 *asz
+= BUF_RESERVE_SIZE
;
419 memcpy((*src
) +*sz
, tmp
, tmplen
);
425 * Given a open file * to /proc/pid/{u,g}id_map, and an id
426 * valid in the caller's namespace, return the id mapped into
428 * Returns the mapped id, or -1 on error.
431 convert_id_to_ns(FILE *idfile
, unsigned int in_id
)
433 unsigned int nsuid
, // base id for a range in the idfile's namespace
434 hostuid
, // base id for a range in the caller's namespace
435 count
; // number of ids in this range
439 fseek(idfile
, 0L, SEEK_SET
);
440 while (fgets(line
, 400, idfile
)) {
441 ret
= sscanf(line
, "%u %u %u\n", &nsuid
, &hostuid
, &count
);
444 if (hostuid
+ count
< hostuid
|| nsuid
+ count
< nsuid
) {
446 * uids wrapped around - unexpected as this is a procfile,
449 fprintf(stderr
, "pid wrapparound at entry %u %u %u in %s\n",
450 nsuid
, hostuid
, count
, line
);
453 if (hostuid
<= in_id
&& hostuid
+count
> in_id
) {
455 * now since hostuid <= in_id < hostuid+count, and
456 * hostuid+count and nsuid+count do not wrap around,
457 * we know that nsuid+(in_id-hostuid) which must be
458 * less that nsuid+(count) must not wrap around
460 return (in_id
- hostuid
) + nsuid
;
469 * for is_privileged_over,
470 * specify whether we require the calling uid to be root in his
473 #define NS_ROOT_REQD true
474 #define NS_ROOT_OPT false
478 static bool is_privileged_over(pid_t pid
, uid_t uid
, uid_t victim
, bool req_ns_root
)
485 if (victim
== -1 || uid
== -1)
489 * If the request is one not requiring root in the namespace,
490 * then having the same uid suffices. (i.e. uid 1000 has write
491 * access to files owned by uid 1000
493 if (!req_ns_root
&& uid
== victim
)
496 ret
= snprintf(fpath
, PROCLEN
, "/proc/%d/uid_map", pid
);
497 if (ret
< 0 || ret
>= PROCLEN
)
499 FILE *f
= fopen(fpath
, "r");
503 /* if caller's not root in his namespace, reject */
504 nsuid
= convert_id_to_ns(f
, uid
);
509 * If victim is not mapped into caller's ns, reject.
510 * XXX I'm not sure this check is needed given that fuse
511 * will be sending requests where the vfs has converted
513 nsuid
= convert_id_to_ns(f
, victim
);
524 static bool perms_include(int fmode
, mode_t req_mode
)
528 switch (req_mode
& O_ACCMODE
) {
536 r
= S_IROTH
| S_IWOTH
;
541 return ((fmode
& r
) == r
);
547 * querycg is /a/b/c/d/e
550 static char *get_next_cgroup_dir(const char *taskcg
, const char *querycg
)
554 if (strlen(taskcg
) <= strlen(querycg
)) {
555 fprintf(stderr
, "%s: I was fed bad input\n", __func__
);
559 if (strcmp(querycg
, "/") == 0)
560 start
= strdup(taskcg
+ 1);
562 start
= strdup(taskcg
+ strlen(querycg
) + 1);
565 end
= strchr(start
, '/');
571 static void stripnewline(char *x
)
573 size_t l
= strlen(x
);
574 if (l
&& x
[l
-1] == '\n')
578 static char *get_pid_cgroup(pid_t pid
, const char *contrl
)
586 const char *h
= find_mounted_controller(contrl
);
590 ret
= snprintf(fnam
, PROCLEN
, "/proc/%d/cgroup", pid
);
591 if (ret
< 0 || ret
>= PROCLEN
)
593 if (!(f
= fopen(fnam
, "r")))
596 while (getline(&line
, &len
, f
) != -1) {
600 c1
= strchr(line
, ':');
604 c2
= strchr(c1
, ':');
608 if (strcmp(c1
, h
) != 0)
625 * check whether a fuse context may access a cgroup dir or file
627 * If file is not null, it is a cgroup file to check under cg.
628 * If file is null, then we are checking perms on cg itself.
630 * For files we can check the mode of the list_keys result.
631 * For cgroups, we must make assumptions based on the files under the
632 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
635 static bool fc_may_access(struct fuse_context
*fc
, const char *contrl
, const char *cg
, const char *file
, mode_t mode
)
637 struct cgfs_files
*k
= NULL
;
640 k
= cgfs_get_key(contrl
, cg
, file
);
644 if (is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_OPT
)) {
645 if (perms_include(k
->mode
>> 6, mode
)) {
650 if (fc
->gid
== k
->gid
) {
651 if (perms_include(k
->mode
>> 3, mode
)) {
656 ret
= perms_include(k
->mode
, mode
);
663 #define INITSCOPE "/init.scope"
664 static void prune_init_slice(char *cg
)
667 size_t cg_len
= strlen(cg
), initscope_len
= strlen(INITSCOPE
);
669 if (cg_len
< initscope_len
)
672 point
= cg
+ cg_len
- initscope_len
;
673 if (strcmp(point
, INITSCOPE
) == 0) {
682 * If pid is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
683 * If pid is in /a, he may act on /a/b, but not on /b.
684 * if the answer is false and nextcg is not NULL, then *nextcg will point
685 * to a string containing the next cgroup directory under cg, which must be
686 * freed by the caller.
688 static bool caller_is_in_ancestor(pid_t pid
, const char *contrl
, const char *cg
, char **nextcg
)
691 char *c2
= get_pid_cgroup(pid
, contrl
);
696 prune_init_slice(c2
);
699 * callers pass in '/' for root cgroup, otherwise they pass
700 * in a cgroup without leading '/'
702 linecmp
= *cg
== '/' ? c2
: c2
+1;
703 if (strncmp(linecmp
, cg
, strlen(linecmp
)) != 0) {
705 *nextcg
= get_next_cgroup_dir(linecmp
, cg
);
717 * If pid is in /a/b/c, he may see that /a exists, but not /b or /a/c.
719 static bool caller_may_see_dir(pid_t pid
, const char *contrl
, const char *cg
)
723 size_t target_len
, task_len
;
725 if (strcmp(cg
, "/") == 0)
728 c2
= get_pid_cgroup(pid
, contrl
);
731 prune_init_slice(c2
);
734 target_len
= strlen(cg
);
735 task_len
= strlen(task_cg
);
737 /* Task is in the root cg, it can see everything. This case is
738 * not handled by the strmcps below, since they test for the
739 * last /, but that is the first / that we've chopped off
745 if (strcmp(cg
, task_cg
) == 0) {
749 if (target_len
< task_len
) {
750 /* looking up a parent dir */
751 if (strncmp(task_cg
, cg
, target_len
) == 0 && task_cg
[target_len
] == '/')
755 if (target_len
> task_len
) {
756 /* looking up a child dir */
757 if (strncmp(task_cg
, cg
, task_len
) == 0 && cg
[task_len
] == '/')
768 * given /cgroup/freezer/a/b, return "freezer".
769 * the returned char* should NOT be freed.
771 static char *pick_controller_from_path(struct fuse_context
*fc
, const char *path
)
776 if (strlen(path
) < 9)
778 if (*(path
+7) != '/')
784 slash
= strstr(contr
, "/");
789 for (i
= 0; i
< num_hierarchies
; i
++) {
790 if (hierarchies
[i
] && strcmp(hierarchies
[i
], contr
) == 0)
791 return hierarchies
[i
];
797 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
798 * Note that the returned value may include files (keynames) etc
800 static const char *find_cgroup_in_path(const char *path
)
804 if (strlen(path
) < 9)
806 p1
= strstr(path
+8, "/");
813 * split the last path element from the path in @cg.
814 * @dir is newly allocated and should be freed, @last not
816 static void get_cgdir_and_path(const char *cg
, char **dir
, char **last
)
823 *last
= strrchr(cg
, '/');
828 p
= strrchr(*dir
, '/');
833 * FUSE ops for /cgroup
836 static int cg_getattr(const char *path
, struct stat
*sb
)
839 struct fuse_context
*fc
= fuse_get_context();
841 char *last
= NULL
, *path1
, *path2
;
842 struct cgfs_files
*k
= NULL
;
844 const char *controller
= NULL
;
851 memset(sb
, 0, sizeof(struct stat
));
853 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
856 sb
->st_uid
= sb
->st_gid
= 0;
857 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
860 if (strcmp(path
, "/cgroup") == 0) {
861 sb
->st_mode
= S_IFDIR
| 00755;
866 controller
= pick_controller_from_path(fc
, path
);
869 cgroup
= find_cgroup_in_path(path
);
871 /* this is just /cgroup/controller, return it as a dir */
872 sb
->st_mode
= S_IFDIR
| 00755;
877 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
887 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
890 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
891 * Then check that caller's cgroup is under path if last is a child
892 * cgroup, or cgdir if last is a file */
894 if (is_child_cgroup(controller
, path1
, path2
)) {
895 if (!caller_may_see_dir(initpid
, controller
, cgroup
)) {
899 if (!caller_is_in_ancestor(initpid
, controller
, cgroup
, NULL
)) {
900 /* this is just /cgroup/controller, return it as a dir */
901 sb
->st_mode
= S_IFDIR
| 00555;
906 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
)) {
911 // get uid, gid, from '/tasks' file and make up a mode
912 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
913 sb
->st_mode
= S_IFDIR
| 00755;
914 k
= cgfs_get_key(controller
, cgroup
, NULL
);
916 sb
->st_uid
= sb
->st_gid
= 0;
927 if ((k
= cgfs_get_key(controller
, path1
, path2
)) != NULL
) {
928 sb
->st_mode
= S_IFREG
| k
->mode
;
934 if (!caller_is_in_ancestor(initpid
, controller
, path1
, NULL
)) {
938 if (!fc_may_access(fc
, controller
, path1
, path2
, O_RDONLY
)) {
951 static int cg_opendir(const char *path
, struct fuse_file_info
*fi
)
953 struct fuse_context
*fc
= fuse_get_context();
955 struct file_info
*dir_info
;
956 char *controller
= NULL
;
961 if (strcmp(path
, "/cgroup") == 0) {
965 // return list of keys for the controller, and list of child cgroups
966 controller
= pick_controller_from_path(fc
, path
);
970 cgroup
= find_cgroup_in_path(path
);
972 /* this is just /cgroup/controller, return its contents */
977 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
981 if (!caller_may_see_dir(initpid
, controller
, cgroup
))
983 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
))
987 /* we'll free this at cg_releasedir */
988 dir_info
= malloc(sizeof(*dir_info
));
991 dir_info
->controller
= must_copy_string(controller
);
992 dir_info
->cgroup
= must_copy_string(cgroup
);
993 dir_info
->type
= LXC_TYPE_CGDIR
;
994 dir_info
->buf
= NULL
;
995 dir_info
->file
= NULL
;
996 dir_info
->buflen
= 0;
998 fi
->fh
= (unsigned long)dir_info
;
1002 static int cg_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
1003 struct fuse_file_info
*fi
)
1005 struct file_info
*d
= (struct file_info
*)fi
->fh
;
1006 struct cgfs_files
**list
= NULL
;
1008 char *nextcg
= NULL
;
1009 struct fuse_context
*fc
= fuse_get_context();
1010 char **clist
= NULL
;
1012 if (d
->type
!= LXC_TYPE_CGDIR
) {
1013 fprintf(stderr
, "Internal error: file cache info used in readdir\n");
1016 if (!d
->cgroup
&& !d
->controller
) {
1017 // ls /var/lib/lxcfs/cgroup - just show list of controllers
1020 for (i
= 0; i
< num_hierarchies
; i
++) {
1021 if (hierarchies
[i
] && filler(buf
, hierarchies
[i
], NULL
, 0) != 0) {
1028 if (!cgfs_list_keys(d
->controller
, d
->cgroup
, &list
)) {
1029 // not a valid cgroup
1034 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
1037 if (!caller_is_in_ancestor(initpid
, d
->controller
, d
->cgroup
, &nextcg
)) {
1040 ret
= filler(buf
, nextcg
, NULL
, 0);
1051 for (i
= 0; list
[i
]; i
++) {
1052 if (filler(buf
, list
[i
]->name
, NULL
, 0) != 0) {
1058 // now get the list of child cgroups
1060 if (!cgfs_list_children(d
->controller
, d
->cgroup
, &clist
)) {
1064 for (i
= 0; clist
[i
]; i
++) {
1065 if (filler(buf
, clist
[i
], NULL
, 0) != 0) {
1075 for (i
= 0; clist
[i
]; i
++)
1082 static void do_release_file_info(struct file_info
*f
)
1086 free(f
->controller
);
1093 static int cg_releasedir(const char *path
, struct fuse_file_info
*fi
)
1095 struct file_info
*d
= (struct file_info
*)fi
->fh
;
1097 do_release_file_info(d
);
1101 static int cg_open(const char *path
, struct fuse_file_info
*fi
)
1104 char *last
= NULL
, *path1
, *path2
, * cgdir
= NULL
, *controller
;
1105 struct cgfs_files
*k
= NULL
;
1106 struct file_info
*file_info
;
1107 struct fuse_context
*fc
= fuse_get_context();
1113 controller
= pick_controller_from_path(fc
, path
);
1116 cgroup
= find_cgroup_in_path(path
);
1120 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
1129 k
= cgfs_get_key(controller
, path1
, path2
);
1136 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
1139 if (!caller_may_see_dir(initpid
, controller
, path1
)) {
1143 if (!fc_may_access(fc
, controller
, path1
, path2
, fi
->flags
)) {
1144 // should never get here
1149 /* we'll free this at cg_release */
1150 file_info
= malloc(sizeof(*file_info
));
1155 file_info
->controller
= must_copy_string(controller
);
1156 file_info
->cgroup
= must_copy_string(path1
);
1157 file_info
->file
= must_copy_string(path2
);
1158 file_info
->type
= LXC_TYPE_CGFILE
;
1159 file_info
->buf
= NULL
;
1160 file_info
->buflen
= 0;
1162 fi
->fh
= (unsigned long)file_info
;
1170 static int cg_release(const char *path
, struct fuse_file_info
*fi
)
1172 struct file_info
*f
= (struct file_info
*)fi
->fh
;
1174 do_release_file_info(f
);
1178 #define POLLIN_SET ( EPOLLIN | EPOLLHUP | EPOLLRDHUP )
1180 static bool wait_for_sock(int sock
, int timeout
)
1182 struct epoll_event ev
;
1183 int epfd
, ret
, now
, starttime
, deltatime
, saved_errno
;
1185 if ((starttime
= time(NULL
)) < 0)
1188 if ((epfd
= epoll_create(1)) < 0) {
1189 fprintf(stderr
, "Failed to create epoll socket: %m\n");
1193 ev
.events
= POLLIN_SET
;
1195 if (epoll_ctl(epfd
, EPOLL_CTL_ADD
, sock
, &ev
) < 0) {
1196 fprintf(stderr
, "Failed adding socket to epoll: %m\n");
1202 if ((now
= time(NULL
)) < 0) {
1207 deltatime
= (starttime
+ timeout
) - now
;
1208 if (deltatime
< 0) { // timeout
1213 ret
= epoll_wait(epfd
, &ev
, 1, 1000*deltatime
+ 1);
1214 if (ret
< 0 && errno
== EINTR
)
1216 saved_errno
= errno
;
1220 errno
= saved_errno
;
1226 static int msgrecv(int sockfd
, void *buf
, size_t len
)
1228 if (!wait_for_sock(sockfd
, 2))
1230 return recv(sockfd
, buf
, len
, MSG_DONTWAIT
);
1233 static int send_creds(int sock
, struct ucred
*cred
, char v
, bool pingfirst
)
1235 struct msghdr msg
= { 0 };
1237 struct cmsghdr
*cmsg
;
1238 char cmsgbuf
[CMSG_SPACE(sizeof(*cred
))];
1243 if (msgrecv(sock
, buf
, 1) != 1) {
1244 fprintf(stderr
, "%s: Error getting reply from server over socketpair\n",
1246 return SEND_CREDS_FAIL
;
1250 msg
.msg_control
= cmsgbuf
;
1251 msg
.msg_controllen
= sizeof(cmsgbuf
);
1253 cmsg
= CMSG_FIRSTHDR(&msg
);
1254 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
1255 cmsg
->cmsg_level
= SOL_SOCKET
;
1256 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
1257 memcpy(CMSG_DATA(cmsg
), cred
, sizeof(*cred
));
1259 msg
.msg_name
= NULL
;
1260 msg
.msg_namelen
= 0;
1264 iov
.iov_len
= sizeof(buf
);
1268 if (sendmsg(sock
, &msg
, 0) < 0) {
1269 fprintf(stderr
, "%s: failed at sendmsg: %s\n", __func__
,
1272 return SEND_CREDS_NOTSK
;
1273 return SEND_CREDS_FAIL
;
1276 return SEND_CREDS_OK
;
1279 static bool recv_creds(int sock
, struct ucred
*cred
, char *v
)
1281 struct msghdr msg
= { 0 };
1283 struct cmsghdr
*cmsg
;
1284 char cmsgbuf
[CMSG_SPACE(sizeof(*cred
))];
1295 if (setsockopt(sock
, SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
1296 fprintf(stderr
, "Failed to set passcred: %s\n", strerror(errno
));
1300 if (write(sock
, buf
, 1) != 1) {
1301 fprintf(stderr
, "Failed to start write on scm fd: %s\n", strerror(errno
));
1305 msg
.msg_name
= NULL
;
1306 msg
.msg_namelen
= 0;
1307 msg
.msg_control
= cmsgbuf
;
1308 msg
.msg_controllen
= sizeof(cmsgbuf
);
1311 iov
.iov_len
= sizeof(buf
);
1315 if (!wait_for_sock(sock
, 2)) {
1316 fprintf(stderr
, "Timed out waiting for scm_cred: %s\n",
1320 ret
= recvmsg(sock
, &msg
, MSG_DONTWAIT
);
1322 fprintf(stderr
, "Failed to receive scm_cred: %s\n",
1327 cmsg
= CMSG_FIRSTHDR(&msg
);
1329 if (cmsg
&& cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)) &&
1330 cmsg
->cmsg_level
== SOL_SOCKET
&&
1331 cmsg
->cmsg_type
== SCM_CREDENTIALS
) {
1332 memcpy(cred
, CMSG_DATA(cmsg
), sizeof(*cred
));
1341 * pid_to_ns - reads pids from a ucred over a socket, then writes the
1342 * int value back over the socket. This shifts the pid from the
1343 * sender's pidns into tpid's pidns.
1345 static void pid_to_ns(int sock
, pid_t tpid
)
1350 while (recv_creds(sock
, &cred
, &v
)) {
1353 if (write(sock
, &cred
.pid
, sizeof(pid_t
)) != sizeof(pid_t
))
1360 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
1361 * in your old pidns. Only children which you fork will be in the target
1362 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
1363 * actually convert pids
1365 static void pid_to_ns_wrapper(int sock
, pid_t tpid
)
1367 int newnsfd
= -1, ret
, cpipe
[2];
1372 ret
= snprintf(fnam
, sizeof(fnam
), "/proc/%d/ns/pid", tpid
);
1373 if (ret
< 0 || ret
>= sizeof(fnam
))
1375 newnsfd
= open(fnam
, O_RDONLY
);
1378 if (setns(newnsfd
, 0) < 0)
1382 if (pipe(cpipe
) < 0)
1392 if (write(cpipe
[1], &b
, sizeof(char)) < 0) {
1393 fprintf(stderr
, "%s (child): erorr on write: %s\n",
1394 __func__
, strerror(errno
));
1397 pid_to_ns(sock
, tpid
);
1398 _exit(1); // not reached
1400 // give the child 1 second to be done forking and
1402 if (!wait_for_sock(cpipe
[0], 1))
1404 ret
= read(cpipe
[0], &v
, 1);
1405 if (ret
!= sizeof(char) || v
!= '1')
1408 if (!wait_for_pid(cpid
))
1414 * To read cgroup files with a particular pid, we will setns into the child
1415 * pidns, open a pipe, fork a child - which will be the first to really be in
1416 * the child ns - which does the cgfs_get_value and writes the data to the pipe.
1418 static bool do_read_pids(pid_t tpid
, const char *contrl
, const char *cg
, const char *file
, char **d
)
1420 int sock
[2] = {-1, -1};
1421 char *tmpdata
= NULL
;
1423 pid_t qpid
, cpid
= -1;
1424 bool answer
= false;
1427 size_t sz
= 0, asz
= 0;
1429 if (!cgfs_get_value(contrl
, cg
, file
, &tmpdata
))
1433 * Now we read the pids from returned data one by one, pass
1434 * them into a child in the target namespace, read back the
1435 * translated pids, and put them into our to-return data
1438 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sock
) < 0) {
1439 perror("socketpair");
1448 if (!cpid
) // child - exits when done
1449 pid_to_ns_wrapper(sock
[1], tpid
);
1451 char *ptr
= tmpdata
;
1454 while (sscanf(ptr
, "%d\n", &qpid
) == 1) {
1456 ret
= send_creds(sock
[0], &cred
, v
, true);
1458 if (ret
== SEND_CREDS_NOTSK
)
1460 if (ret
== SEND_CREDS_FAIL
)
1463 // read converted results
1464 if (!wait_for_sock(sock
[0], 2)) {
1465 fprintf(stderr
, "%s: timed out waiting for pid from child: %s\n",
1466 __func__
, strerror(errno
));
1469 if (read(sock
[0], &qpid
, sizeof(qpid
)) != sizeof(qpid
)) {
1470 fprintf(stderr
, "%s: error reading pid from child: %s\n",
1471 __func__
, strerror(errno
));
1474 must_strcat_pid(d
, &sz
, &asz
, qpid
);
1476 ptr
= strchr(ptr
, '\n');
1482 cred
.pid
= getpid();
1484 if (send_creds(sock
[0], &cred
, v
, true) != SEND_CREDS_OK
) {
1485 // failed to ask child to exit
1486 fprintf(stderr
, "%s: failed to ask child to exit: %s\n",
1487 __func__
, strerror(errno
));
1497 if (sock
[0] != -1) {
1504 static int cg_read(const char *path
, char *buf
, size_t size
, off_t offset
,
1505 struct fuse_file_info
*fi
)
1507 struct fuse_context
*fc
= fuse_get_context();
1508 struct file_info
*f
= (struct file_info
*)fi
->fh
;
1509 struct cgfs_files
*k
= NULL
;
1514 if (f
->type
!= LXC_TYPE_CGFILE
) {
1515 fprintf(stderr
, "Internal error: directory cache info used in cg_read\n");
1528 if ((k
= cgfs_get_key(f
->controller
, f
->cgroup
, f
->file
)) == NULL
) {
1534 if (!fc_may_access(fc
, f
->controller
, f
->cgroup
, f
->file
, O_RDONLY
)) { // should never get here
1539 if (strcmp(f
->file
, "tasks") == 0 ||
1540 strcmp(f
->file
, "/tasks") == 0 ||
1541 strcmp(f
->file
, "/cgroup.procs") == 0 ||
1542 strcmp(f
->file
, "cgroup.procs") == 0)
1543 // special case - we have to translate the pids
1544 r
= do_read_pids(fc
->pid
, f
->controller
, f
->cgroup
, f
->file
, &data
);
1546 r
= cgfs_get_value(f
->controller
, f
->cgroup
, f
->file
, &data
);
1560 memcpy(buf
, data
, s
);
1561 if (s
> 0 && s
< size
&& data
[s
-1] != '\n')
1571 static void pid_from_ns(int sock
, pid_t tpid
)
1581 if (!wait_for_sock(sock
, 2)) {
1582 fprintf(stderr
, "%s: timeout reading from parent\n", __func__
);
1585 if ((ret
= read(sock
, &vpid
, sizeof(pid_t
))) != sizeof(pid_t
)) {
1586 fprintf(stderr
, "%s: bad read from parent: %s\n",
1587 __func__
, strerror(errno
));
1590 if (vpid
== -1) // done
1594 if (send_creds(sock
, &cred
, v
, true) != SEND_CREDS_OK
) {
1596 cred
.pid
= getpid();
1597 if (send_creds(sock
, &cred
, v
, false) != SEND_CREDS_OK
)
1604 static void pid_from_ns_wrapper(int sock
, pid_t tpid
)
1606 int newnsfd
= -1, ret
, cpipe
[2];
1611 ret
= snprintf(fnam
, sizeof(fnam
), "/proc/%d/ns/pid", tpid
);
1612 if (ret
< 0 || ret
>= sizeof(fnam
))
1614 newnsfd
= open(fnam
, O_RDONLY
);
1617 if (setns(newnsfd
, 0) < 0)
1621 if (pipe(cpipe
) < 0)
1633 if (write(cpipe
[1], &b
, sizeof(char)) < 0) {
1634 fprintf(stderr
, "%s (child): erorr on write: %s\n",
1635 __func__
, strerror(errno
));
1638 pid_from_ns(sock
, tpid
);
1641 // give the child 1 second to be done forking and
1643 if (!wait_for_sock(cpipe
[0], 1))
1645 ret
= read(cpipe
[0], &v
, 1);
1646 if (ret
!= sizeof(char) || v
!= '1') {
1650 if (!wait_for_pid(cpid
))
1655 kill(cpid
, SIGKILL
);
1661 * Given host @uid, return the uid to which it maps in
1662 * @pid's user namespace, or -1 if none.
1664 bool hostuid_to_ns(uid_t uid
, pid_t pid
, uid_t
*answer
)
1669 sprintf(line
, "/proc/%d/uid_map", pid
);
1670 if ((f
= fopen(line
, "r")) == NULL
) {
1674 *answer
= convert_id_to_ns(f
, uid
);
1683 * get_pid_creds: get the real uid and gid of @pid from
1685 * (XXX should we use euid here?)
1687 void get_pid_creds(pid_t pid
, uid_t
*uid
, gid_t
*gid
)
1696 sprintf(line
, "/proc/%d/status", pid
);
1697 if ((f
= fopen(line
, "r")) == NULL
) {
1698 fprintf(stderr
, "Error opening %s: %s\n", line
, strerror(errno
));
1701 while (fgets(line
, 400, f
)) {
1702 if (strncmp(line
, "Uid:", 4) == 0) {
1703 if (sscanf(line
+4, "%u", &u
) != 1) {
1704 fprintf(stderr
, "bad uid line for pid %u\n", pid
);
1709 } else if (strncmp(line
, "Gid:", 4) == 0) {
1710 if (sscanf(line
+4, "%u", &g
) != 1) {
1711 fprintf(stderr
, "bad gid line for pid %u\n", pid
);
1722 * May the requestor @r move victim @v to a new cgroup?
1723 * This is allowed if
1724 * . they are the same task
1725 * . they are ownedy by the same uid
1726 * . @r is root on the host, or
1727 * . @v's uid is mapped into @r's where @r is root.
1729 bool may_move_pid(pid_t r
, uid_t r_uid
, pid_t v
)
1731 uid_t v_uid
, tmpuid
;
1738 get_pid_creds(v
, &v_uid
, &v_gid
);
1741 if (hostuid_to_ns(r_uid
, r
, &tmpuid
) && tmpuid
== 0
1742 && hostuid_to_ns(v_uid
, r
, &tmpuid
))
1747 static bool do_write_pids(pid_t tpid
, uid_t tuid
, const char *contrl
, const char *cg
,
1748 const char *file
, const char *buf
)
1750 int sock
[2] = {-1, -1};
1751 pid_t qpid
, cpid
= -1;
1752 FILE *pids_file
= NULL
;
1753 bool answer
= false, fail
= false;
1755 pids_file
= open_pids_file(contrl
, cg
);
1760 * write the pids to a socket, have helper in writer's pidns
1761 * call movepid for us
1763 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sock
) < 0) {
1764 perror("socketpair");
1772 if (!cpid
) { // child
1774 pid_from_ns_wrapper(sock
[1], tpid
);
1777 const char *ptr
= buf
;
1778 while (sscanf(ptr
, "%d", &qpid
) == 1) {
1782 if (write(sock
[0], &qpid
, sizeof(qpid
)) != sizeof(qpid
)) {
1783 fprintf(stderr
, "%s: error writing pid to child: %s\n",
1784 __func__
, strerror(errno
));
1788 if (recv_creds(sock
[0], &cred
, &v
)) {
1790 if (!may_move_pid(tpid
, tuid
, cred
.pid
)) {
1794 if (fprintf(pids_file
, "%d", (int) cred
.pid
) < 0)
1799 ptr
= strchr(ptr
, '\n');
1805 /* All good, write the value */
1807 if (write(sock
[0], &qpid
,sizeof(qpid
)) != sizeof(qpid
))
1808 fprintf(stderr
, "Warning: failed to ask child to exit\n");
1816 if (sock
[0] != -1) {
1821 if (fclose(pids_file
) != 0)
1827 int cg_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
1828 struct fuse_file_info
*fi
)
1830 struct fuse_context
*fc
= fuse_get_context();
1831 char *localbuf
= NULL
;
1832 struct cgfs_files
*k
= NULL
;
1833 struct file_info
*f
= (struct file_info
*)fi
->fh
;
1836 if (f
->type
!= LXC_TYPE_CGFILE
) {
1837 fprintf(stderr
, "Internal error: directory cache info used in cg_write\n");
1847 localbuf
= alloca(size
+1);
1848 localbuf
[size
] = '\0';
1849 memcpy(localbuf
, buf
, size
);
1851 if ((k
= cgfs_get_key(f
->controller
, f
->cgroup
, f
->file
)) == NULL
) {
1856 if (!fc_may_access(fc
, f
->controller
, f
->cgroup
, f
->file
, O_WRONLY
)) {
1861 if (strcmp(f
->file
, "tasks") == 0 ||
1862 strcmp(f
->file
, "/tasks") == 0 ||
1863 strcmp(f
->file
, "/cgroup.procs") == 0 ||
1864 strcmp(f
->file
, "cgroup.procs") == 0)
1865 // special case - we have to translate the pids
1866 r
= do_write_pids(fc
->pid
, fc
->uid
, f
->controller
, f
->cgroup
, f
->file
, localbuf
);
1868 r
= cgfs_set_value(f
->controller
, f
->cgroup
, f
->file
, localbuf
);
1878 int cg_chown(const char *path
, uid_t uid
, gid_t gid
)
1880 struct fuse_context
*fc
= fuse_get_context();
1881 char *cgdir
= NULL
, *last
= NULL
, *path1
, *path2
, *controller
;
1882 struct cgfs_files
*k
= NULL
;
1889 if (strcmp(path
, "/cgroup") == 0)
1892 controller
= pick_controller_from_path(fc
, path
);
1895 cgroup
= find_cgroup_in_path(path
);
1897 /* this is just /cgroup/controller */
1900 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
1910 if (is_child_cgroup(controller
, path1
, path2
)) {
1911 // get uid, gid, from '/tasks' file and make up a mode
1912 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1913 k
= cgfs_get_key(controller
, cgroup
, "tasks");
1916 k
= cgfs_get_key(controller
, path1
, path2
);
1924 * This being a fuse request, the uid and gid must be valid
1925 * in the caller's namespace. So we can just check to make
1926 * sure that the caller is root in his uid, and privileged
1927 * over the file's current owner.
1929 if (!is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_REQD
)) {
1934 ret
= cgfs_chown_file(controller
, cgroup
, uid
, gid
);
1943 int cg_chmod(const char *path
, mode_t mode
)
1945 struct fuse_context
*fc
= fuse_get_context();
1946 char * cgdir
= NULL
, *last
= NULL
, *path1
, *path2
, *controller
;
1947 struct cgfs_files
*k
= NULL
;
1954 if (strcmp(path
, "/cgroup") == 0)
1957 controller
= pick_controller_from_path(fc
, path
);
1960 cgroup
= find_cgroup_in_path(path
);
1962 /* this is just /cgroup/controller */
1965 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
1975 if (is_child_cgroup(controller
, path1
, path2
)) {
1976 // get uid, gid, from '/tasks' file and make up a mode
1977 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1978 k
= cgfs_get_key(controller
, cgroup
, "tasks");
1981 k
= cgfs_get_key(controller
, path1
, path2
);
1989 * This being a fuse request, the uid and gid must be valid
1990 * in the caller's namespace. So we can just check to make
1991 * sure that the caller is root in his uid, and privileged
1992 * over the file's current owner.
1994 if (!is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_OPT
)) {
1999 if (!cgfs_chmod_file(controller
, cgroup
, mode
)) {
2011 int cg_mkdir(const char *path
, mode_t mode
)
2013 struct fuse_context
*fc
= fuse_get_context();
2014 char *last
= NULL
, *path1
, *cgdir
= NULL
, *controller
, *next
= NULL
;
2022 controller
= pick_controller_from_path(fc
, path
);
2026 cgroup
= find_cgroup_in_path(path
);
2030 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
2036 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2039 if (!caller_is_in_ancestor(initpid
, controller
, path1
, &next
)) {
2042 else if (last
&& strcmp(next
, last
) == 0)
2049 if (!fc_may_access(fc
, controller
, path1
, NULL
, O_RDWR
)) {
2053 if (!caller_is_in_ancestor(initpid
, controller
, path1
, NULL
)) {
2058 ret
= cgfs_create(controller
, cgroup
, fc
->uid
, fc
->gid
);
2066 static int cg_rmdir(const char *path
)
2068 struct fuse_context
*fc
= fuse_get_context();
2069 char *last
= NULL
, *cgdir
= NULL
, *controller
, *next
= NULL
;
2076 controller
= pick_controller_from_path(fc
, path
);
2080 cgroup
= find_cgroup_in_path(path
);
2084 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
2090 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2093 if (!caller_is_in_ancestor(initpid
, controller
, cgroup
, &next
)) {
2094 if (!last
|| strcmp(next
, last
) == 0)
2101 if (!fc_may_access(fc
, controller
, cgdir
, NULL
, O_WRONLY
)) {
2105 if (!caller_is_in_ancestor(initpid
, controller
, cgroup
, NULL
)) {
2110 if (!cgfs_remove(controller
, cgroup
)) {
2123 static bool startswith(const char *line
, const char *pref
)
2125 if (strncmp(line
, pref
, strlen(pref
)) == 0)
2130 static void get_mem_cached(char *memstat
, unsigned long *v
)
2136 if (startswith(memstat
, "total_cache")) {
2137 sscanf(memstat
+ 11, "%lu", v
);
2141 eol
= strchr(memstat
, '\n');
2148 static void get_blkio_io_value(char *str
, unsigned major
, unsigned minor
, char *iotype
, unsigned long *v
)
2154 snprintf(key
, 32, "%u:%u %s", major
, minor
, iotype
);
2156 size_t len
= strlen(key
);
2160 if (startswith(str
, key
)) {
2161 sscanf(str
+ len
, "%lu", v
);
2164 eol
= strchr(str
, '\n');
2171 static int read_file(const char *path
, char *buf
, size_t size
,
2172 struct file_info
*d
)
2174 size_t linelen
= 0, total_len
= 0, rv
= 0;
2176 char *cache
= d
->buf
;
2177 size_t cache_size
= d
->buflen
;
2178 FILE *f
= fopen(path
, "r");
2182 while (getline(&line
, &linelen
, f
) != -1) {
2183 size_t l
= snprintf(cache
, cache_size
, "%s", line
);
2185 perror("Error writing to cache");
2189 if (l
>= cache_size
) {
2190 fprintf(stderr
, "Internal error: truncated write to cache\n");
2194 if (l
< cache_size
) {
2199 cache
+= cache_size
;
2200 total_len
+= cache_size
;
2206 d
->size
= total_len
;
2207 if (total_len
> size
) total_len
= size
;
2209 /* read from off 0 */
2210 memcpy(buf
, d
->buf
, total_len
);
2219 * FUSE ops for /proc
2222 static unsigned long get_memlimit(const char *cgroup
)
2224 char *memlimit_str
= NULL
;
2225 unsigned long memlimit
= -1;
2227 if (cgfs_get_value("memory", cgroup
, "memory.limit_in_bytes", &memlimit_str
))
2228 memlimit
= strtoul(memlimit_str
, NULL
, 10);
2235 static unsigned long get_min_memlimit(const char *cgroup
)
2237 char *copy
= strdupa(cgroup
);
2238 unsigned long memlimit
= 0, retlimit
;
2240 retlimit
= get_memlimit(copy
);
2242 while (strcmp(copy
, "/") != 0) {
2243 copy
= dirname(copy
);
2244 memlimit
= get_memlimit(copy
);
2245 if (memlimit
!= -1 && memlimit
< retlimit
)
2246 retlimit
= memlimit
;
2252 static int proc_meminfo_read(char *buf
, size_t size
, off_t offset
,
2253 struct fuse_file_info
*fi
)
2255 struct fuse_context
*fc
= fuse_get_context();
2256 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2258 char *memusage_str
= NULL
, *memstat_str
= NULL
,
2259 *memswlimit_str
= NULL
, *memswusage_str
= NULL
,
2260 *memswlimit_default_str
= NULL
, *memswusage_default_str
= NULL
;
2261 unsigned long memlimit
= 0, memusage
= 0, memswlimit
= 0, memswusage
= 0,
2262 cached
= 0, hosttotal
= 0;
2264 size_t linelen
= 0, total_len
= 0, rv
= 0;
2265 char *cache
= d
->buf
;
2266 size_t cache_size
= d
->buflen
;
2270 if (offset
> d
->size
)
2274 int left
= d
->size
- offset
;
2275 total_len
= left
> size
? size
: left
;
2276 memcpy(buf
, cache
+ offset
, total_len
);
2280 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2283 cg
= get_pid_cgroup(initpid
, "memory");
2285 return read_file("/proc/meminfo", buf
, size
, d
);
2287 memlimit
= get_min_memlimit(cg
);
2288 if (!cgfs_get_value("memory", cg
, "memory.usage_in_bytes", &memusage_str
))
2290 if (!cgfs_get_value("memory", cg
, "memory.stat", &memstat_str
))
2293 // Following values are allowed to fail, because swapaccount might be turned
2294 // off for current kernel
2295 if(cgfs_get_value("memory", cg
, "memory.memsw.limit_in_bytes", &memswlimit_str
) &&
2296 cgfs_get_value("memory", cg
, "memory.memsw.usage_in_bytes", &memswusage_str
))
2298 /* If swapaccounting is turned on, then default value is assumed to be that of cgroup / */
2299 if (!cgfs_get_value("memory", "/", "memory.memsw.limit_in_bytes", &memswlimit_default_str
))
2301 if (!cgfs_get_value("memory", "/", "memory.memsw.usage_in_bytes", &memswusage_default_str
))
2304 memswlimit
= strtoul(memswlimit_str
, NULL
, 10);
2305 memswusage
= strtoul(memswusage_str
, NULL
, 10);
2307 if (!strcmp(memswlimit_str
, memswlimit_default_str
))
2309 if (!strcmp(memswusage_str
, memswusage_default_str
))
2312 memswlimit
= memswlimit
/ 1024;
2313 memswusage
= memswusage
/ 1024;
2316 memusage
= strtoul(memusage_str
, NULL
, 10);
2320 get_mem_cached(memstat_str
, &cached
);
2322 f
= fopen("/proc/meminfo", "r");
2326 while (getline(&line
, &linelen
, f
) != -1) {
2328 char *printme
, lbuf
[100];
2330 memset(lbuf
, 0, 100);
2331 if (startswith(line
, "MemTotal:")) {
2332 sscanf(line
+14, "%lu", &hosttotal
);
2333 if (hosttotal
< memlimit
)
2334 memlimit
= hosttotal
;
2335 snprintf(lbuf
, 100, "MemTotal: %8lu kB\n", memlimit
);
2337 } else if (startswith(line
, "MemFree:")) {
2338 snprintf(lbuf
, 100, "MemFree: %8lu kB\n", memlimit
- memusage
);
2340 } else if (startswith(line
, "MemAvailable:")) {
2341 snprintf(lbuf
, 100, "MemAvailable: %8lu kB\n", memlimit
- memusage
);
2343 } else if (startswith(line
, "SwapTotal:") && memswlimit
> 0) {
2344 snprintf(lbuf
, 100, "SwapTotal: %8lu kB\n", memswlimit
- memlimit
);
2346 } else if (startswith(line
, "SwapFree:") && memswlimit
> 0 && memswusage
> 0) {
2347 snprintf(lbuf
, 100, "SwapFree: %8lu kB\n",
2348 (memswlimit
- memlimit
) - (memswusage
- memusage
));
2350 } else if (startswith(line
, "Buffers:")) {
2351 snprintf(lbuf
, 100, "Buffers: %8lu kB\n", 0UL);
2353 } else if (startswith(line
, "Cached:")) {
2354 snprintf(lbuf
, 100, "Cached: %8lu kB\n", cached
);
2356 } else if (startswith(line
, "SwapCached:")) {
2357 snprintf(lbuf
, 100, "SwapCached: %8lu kB\n", 0UL);
2362 l
= snprintf(cache
, cache_size
, "%s", printme
);
2364 perror("Error writing to cache");
2369 if (l
>= cache_size
) {
2370 fprintf(stderr
, "Internal error: truncated write to cache\n");
2381 d
->size
= total_len
;
2382 if (total_len
> size
) total_len
= size
;
2383 memcpy(buf
, d
->buf
, total_len
);
2392 free(memswlimit_str
);
2393 free(memswusage_str
);
2395 free(memswlimit_default_str
);
2396 free(memswusage_default_str
);
2401 * Read the cpuset.cpus for cg
2402 * Return the answer in a newly allocated string which must be freed
2404 static char *get_cpuset(const char *cg
)
2408 if (!cgfs_get_value("cpuset", cg
, "cpuset.cpus", &answer
))
2413 bool cpu_in_cpuset(int cpu
, const char *cpuset
);
2415 static bool cpuline_in_cpuset(const char *line
, const char *cpuset
)
2419 if (sscanf(line
, "processor : %d", &cpu
) != 1)
2421 return cpu_in_cpuset(cpu
, cpuset
);
2425 * check whether this is a '^processor" line in /proc/cpuinfo
2427 static bool is_processor_line(const char *line
)
2431 if (sscanf(line
, "processor : %d", &cpu
) == 1)
2436 static int proc_cpuinfo_read(char *buf
, size_t size
, off_t offset
,
2437 struct fuse_file_info
*fi
)
2439 struct fuse_context
*fc
= fuse_get_context();
2440 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2442 char *cpuset
= NULL
;
2444 size_t linelen
= 0, total_len
= 0, rv
= 0;
2445 bool am_printing
= false;
2447 char *cache
= d
->buf
;
2448 size_t cache_size
= d
->buflen
;
2452 if (offset
> d
->size
)
2456 int left
= d
->size
- offset
;
2457 total_len
= left
> size
? size
: left
;
2458 memcpy(buf
, cache
+ offset
, total_len
);
2462 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2465 cg
= get_pid_cgroup(initpid
, "cpuset");
2467 return read_file("proc/cpuinfo", buf
, size
, d
);
2469 cpuset
= get_cpuset(cg
);
2473 f
= fopen("/proc/cpuinfo", "r");
2477 while (getline(&line
, &linelen
, f
) != -1) {
2479 if (is_processor_line(line
)) {
2480 am_printing
= cpuline_in_cpuset(line
, cpuset
);
2483 l
= snprintf(cache
, cache_size
, "processor : %d\n", curcpu
);
2485 perror("Error writing to cache");
2489 if (l
>= cache_size
) {
2490 fprintf(stderr
, "Internal error: truncated write to cache\n");
2494 if (l
< cache_size
){
2499 cache
+= cache_size
;
2500 total_len
+= cache_size
;
2508 l
= snprintf(cache
, cache_size
, "%s", line
);
2510 perror("Error writing to cache");
2514 if (l
>= cache_size
) {
2515 fprintf(stderr
, "Internal error: truncated write to cache\n");
2519 if (l
< cache_size
) {
2524 cache
+= cache_size
;
2525 total_len
+= cache_size
;
2533 d
->size
= total_len
;
2534 if (total_len
> size
) total_len
= size
;
2536 /* read from off 0 */
2537 memcpy(buf
, d
->buf
, total_len
);
2548 static int proc_stat_read(char *buf
, size_t size
, off_t offset
,
2549 struct fuse_file_info
*fi
)
2551 struct fuse_context
*fc
= fuse_get_context();
2552 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2554 char *cpuset
= NULL
;
2556 size_t linelen
= 0, total_len
= 0, rv
= 0;
2557 int curcpu
= -1; /* cpu numbering starts at 0 */
2558 unsigned long user
= 0, nice
= 0, system
= 0, idle
= 0, iowait
= 0, irq
= 0, softirq
= 0, steal
= 0, guest
= 0;
2559 unsigned long user_sum
= 0, nice_sum
= 0, system_sum
= 0, idle_sum
= 0, iowait_sum
= 0,
2560 irq_sum
= 0, softirq_sum
= 0, steal_sum
= 0, guest_sum
= 0;
2561 #define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
2562 char cpuall
[CPUALL_MAX_SIZE
];
2563 /* reserve for cpu all */
2564 char *cache
= d
->buf
+ CPUALL_MAX_SIZE
;
2565 size_t cache_size
= d
->buflen
- CPUALL_MAX_SIZE
;
2569 if (offset
> d
->size
)
2573 int left
= d
->size
- offset
;
2574 total_len
= left
> size
? size
: left
;
2575 memcpy(buf
, d
->buf
+ offset
, total_len
);
2579 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2582 cg
= get_pid_cgroup(initpid
, "cpuset");
2584 return read_file("/proc/stat", buf
, size
, d
);
2586 cpuset
= get_cpuset(cg
);
2590 f
= fopen("/proc/stat", "r");
2595 if (getline(&line
, &linelen
, f
) < 0) {
2596 fprintf(stderr
, "proc_stat_read read first line failed\n");
2600 while (getline(&line
, &linelen
, f
) != -1) {
2603 char cpu_char
[10]; /* That's a lot of cores */
2606 if (sscanf(line
, "cpu%9[^ ]", cpu_char
) != 1) {
2607 /* not a ^cpuN line containing a number N, just print it */
2608 l
= snprintf(cache
, cache_size
, "%s", line
);
2610 perror("Error writing to cache");
2614 if (l
>= cache_size
) {
2615 fprintf(stderr
, "Internal error: truncated write to cache\n");
2619 if (l
< cache_size
) {
2625 //no more space, break it
2626 cache
+= cache_size
;
2627 total_len
+= cache_size
;
2633 if (sscanf(cpu_char
, "%d", &cpu
) != 1)
2635 if (!cpu_in_cpuset(cpu
, cpuset
))
2639 c
= strchr(line
, ' ');
2642 l
= snprintf(cache
, cache_size
, "cpu%d%s", curcpu
, c
);
2644 perror("Error writing to cache");
2649 if (l
>= cache_size
) {
2650 fprintf(stderr
, "Internal error: truncated write to cache\n");
2659 if (sscanf(line
, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user
, &nice
, &system
, &idle
, &iowait
, &irq
,
2660 &softirq
, &steal
, &guest
) != 9)
2664 system_sum
+= system
;
2666 iowait_sum
+= iowait
;
2668 softirq_sum
+= softirq
;
2675 int cpuall_len
= snprintf(cpuall
, CPUALL_MAX_SIZE
, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
2676 "cpu ", user_sum
, nice_sum
, system_sum
, idle_sum
, iowait_sum
, irq_sum
, softirq_sum
, steal_sum
, guest_sum
);
2677 if (cpuall_len
> 0 && cpuall_len
< CPUALL_MAX_SIZE
){
2678 memcpy(cache
, cpuall
, cpuall_len
);
2679 cache
+= cpuall_len
;
2681 /* shouldn't happen */
2682 fprintf(stderr
, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len
);
2686 memmove(cache
, d
->buf
+ CPUALL_MAX_SIZE
, total_len
);
2687 total_len
+= cpuall_len
;
2689 d
->size
= total_len
;
2690 if (total_len
> size
) total_len
= size
;
2692 memcpy(buf
, d
->buf
, total_len
);
2704 static long int getreaperage(pid_t pid
)
2711 qpid
= lookup_initpid_in_store(pid
);
2715 ret
= snprintf(fnam
, 100, "/proc/%d", qpid
);
2716 if (ret
< 0 || ret
>= 100)
2719 if (lstat(fnam
, &sb
) < 0)
2722 return time(NULL
) - sb
.st_ctime
;
2725 static unsigned long get_reaper_busy(pid_t task
)
2727 pid_t initpid
= lookup_initpid_in_store(task
);
2728 char *cgroup
= NULL
, *usage_str
= NULL
;
2729 unsigned long usage
= 0;
2734 cgroup
= get_pid_cgroup(initpid
, "cpuacct");
2737 if (!cgfs_get_value("cpuacct", cgroup
, "cpuacct.usage", &usage_str
))
2739 usage
= strtoul(usage_str
, NULL
, 10);
2740 usage
/= 1000000000;
2749 * We read /proc/uptime and reuse its second field.
2750 * For the first field, we use the mtime for the reaper for
2751 * the calling pid as returned by getreaperage
2753 static int proc_uptime_read(char *buf
, size_t size
, off_t offset
,
2754 struct fuse_file_info
*fi
)
2756 struct fuse_context
*fc
= fuse_get_context();
2757 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2758 long int reaperage
= getreaperage(fc
->pid
);
2759 unsigned long int busytime
= get_reaper_busy(fc
->pid
), idletime
;
2760 char *cache
= d
->buf
;
2761 size_t total_len
= 0;
2764 if (offset
> d
->size
)
2768 int left
= d
->size
- offset
;
2769 total_len
= left
> size
? size
: left
;
2770 memcpy(buf
, cache
+ offset
, total_len
);
2774 idletime
= reaperage
- busytime
;
2775 if (idletime
> reaperage
)
2776 idletime
= reaperage
;
2778 total_len
= snprintf(d
->buf
, d
->size
, "%ld.0 %lu.0\n", reaperage
, idletime
);
2780 perror("Error writing to cache");
2784 d
->size
= (int)total_len
;
2787 if (total_len
> size
) total_len
= size
;
2789 memcpy(buf
, d
->buf
, total_len
);
2793 static int proc_diskstats_read(char *buf
, size_t size
, off_t offset
,
2794 struct fuse_file_info
*fi
)
2797 struct fuse_context
*fc
= fuse_get_context();
2798 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2800 char *io_serviced_str
= NULL
, *io_merged_str
= NULL
, *io_service_bytes_str
= NULL
,
2801 *io_wait_time_str
= NULL
, *io_service_time_str
= NULL
;
2802 unsigned long read
= 0, write
= 0;
2803 unsigned long read_merged
= 0, write_merged
= 0;
2804 unsigned long read_sectors
= 0, write_sectors
= 0;
2805 unsigned long read_ticks
= 0, write_ticks
= 0;
2806 unsigned long ios_pgr
= 0, tot_ticks
= 0, rq_ticks
= 0;
2807 unsigned long rd_svctm
= 0, wr_svctm
= 0, rd_wait
= 0, wr_wait
= 0;
2808 char *cache
= d
->buf
;
2809 size_t cache_size
= d
->buflen
;
2811 size_t linelen
= 0, total_len
= 0, rv
= 0;
2812 unsigned int major
= 0, minor
= 0;
2817 if (offset
> d
->size
)
2821 int left
= d
->size
- offset
;
2822 total_len
= left
> size
? size
: left
;
2823 memcpy(buf
, cache
+ offset
, total_len
);
2827 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2830 cg
= get_pid_cgroup(initpid
, "blkio");
2832 return read_file("/proc/diskstats", buf
, size
, d
);
2834 if (!cgfs_get_value("blkio", cg
, "blkio.io_serviced", &io_serviced_str
))
2836 if (!cgfs_get_value("blkio", cg
, "blkio.io_merged", &io_merged_str
))
2838 if (!cgfs_get_value("blkio", cg
, "blkio.io_service_bytes", &io_service_bytes_str
))
2840 if (!cgfs_get_value("blkio", cg
, "blkio.io_wait_time", &io_wait_time_str
))
2842 if (!cgfs_get_value("blkio", cg
, "blkio.io_service_time", &io_service_time_str
))
2846 f
= fopen("/proc/diskstats", "r");
2850 while (getline(&line
, &linelen
, f
) != -1) {
2852 char *printme
, lbuf
[256];
2854 i
= sscanf(line
, "%u %u %71s", &major
, &minor
, dev_name
);
2856 get_blkio_io_value(io_serviced_str
, major
, minor
, "Read", &read
);
2857 get_blkio_io_value(io_serviced_str
, major
, minor
, "Write", &write
);
2858 get_blkio_io_value(io_merged_str
, major
, minor
, "Read", &read_merged
);
2859 get_blkio_io_value(io_merged_str
, major
, minor
, "Write", &write_merged
);
2860 get_blkio_io_value(io_service_bytes_str
, major
, minor
, "Read", &read_sectors
);
2861 read_sectors
= read_sectors
/512;
2862 get_blkio_io_value(io_service_bytes_str
, major
, minor
, "Write", &write_sectors
);
2863 write_sectors
= write_sectors
/512;
2865 get_blkio_io_value(io_service_time_str
, major
, minor
, "Read", &rd_svctm
);
2866 rd_svctm
= rd_svctm
/1000000;
2867 get_blkio_io_value(io_wait_time_str
, major
, minor
, "Read", &rd_wait
);
2868 rd_wait
= rd_wait
/1000000;
2869 read_ticks
= rd_svctm
+ rd_wait
;
2871 get_blkio_io_value(io_service_time_str
, major
, minor
, "Write", &wr_svctm
);
2872 wr_svctm
= wr_svctm
/1000000;
2873 get_blkio_io_value(io_wait_time_str
, major
, minor
, "Write", &wr_wait
);
2874 wr_wait
= wr_wait
/1000000;
2875 write_ticks
= wr_svctm
+ wr_wait
;
2877 get_blkio_io_value(io_service_time_str
, major
, minor
, "Total", &tot_ticks
);
2878 tot_ticks
= tot_ticks
/1000000;
2883 memset(lbuf
, 0, 256);
2884 if (read
|| write
|| read_merged
|| write_merged
|| read_sectors
|| write_sectors
|| read_ticks
|| write_ticks
) {
2885 snprintf(lbuf
, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
2886 major
, minor
, dev_name
, read
, read_merged
, read_sectors
, read_ticks
,
2887 write
, write_merged
, write_sectors
, write_ticks
, ios_pgr
, tot_ticks
, rq_ticks
);
2892 l
= snprintf(cache
, cache_size
, "%s", printme
);
2894 perror("Error writing to fuse buf");
2898 if (l
>= cache_size
) {
2899 fprintf(stderr
, "Internal error: truncated write to cache\n");
2909 d
->size
= total_len
;
2910 if (total_len
> size
) total_len
= size
;
2911 memcpy(buf
, d
->buf
, total_len
);
2919 free(io_serviced_str
);
2920 free(io_merged_str
);
2921 free(io_service_bytes_str
);
2922 free(io_wait_time_str
);
2923 free(io_service_time_str
);
2927 static off_t
get_procfile_size(const char *which
)
2929 FILE *f
= fopen(which
, "r");
2932 ssize_t sz
, answer
= 0;
2936 while ((sz
= getline(&line
, &len
, f
)) != -1)
2944 static int proc_getattr(const char *path
, struct stat
*sb
)
2946 struct timespec now
;
2948 memset(sb
, 0, sizeof(struct stat
));
2949 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
2951 sb
->st_uid
= sb
->st_gid
= 0;
2952 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
2953 if (strcmp(path
, "/proc") == 0) {
2954 sb
->st_mode
= S_IFDIR
| 00555;
2958 if (strcmp(path
, "/proc/meminfo") == 0 ||
2959 strcmp(path
, "/proc/cpuinfo") == 0 ||
2960 strcmp(path
, "/proc/uptime") == 0 ||
2961 strcmp(path
, "/proc/stat") == 0 ||
2962 strcmp(path
, "/proc/diskstats") == 0) {
2964 sb
->st_mode
= S_IFREG
| 00444;
2972 static int proc_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
2973 struct fuse_file_info
*fi
)
2975 if (filler(buf
, "cpuinfo", NULL
, 0) != 0 ||
2976 filler(buf
, "meminfo", NULL
, 0) != 0 ||
2977 filler(buf
, "stat", NULL
, 0) != 0 ||
2978 filler(buf
, "uptime", NULL
, 0) != 0 ||
2979 filler(buf
, "diskstats", NULL
, 0) != 0)
2984 static int proc_open(const char *path
, struct fuse_file_info
*fi
)
2987 struct file_info
*info
;
2989 if (strcmp(path
, "/proc/meminfo") == 0)
2990 type
= LXC_TYPE_PROC_MEMINFO
;
2991 else if (strcmp(path
, "/proc/cpuinfo") == 0)
2992 type
= LXC_TYPE_PROC_CPUINFO
;
2993 else if (strcmp(path
, "/proc/uptime") == 0)
2994 type
= LXC_TYPE_PROC_UPTIME
;
2995 else if (strcmp(path
, "/proc/stat") == 0)
2996 type
= LXC_TYPE_PROC_STAT
;
2997 else if (strcmp(path
, "/proc/diskstats") == 0)
2998 type
= LXC_TYPE_PROC_DISKSTATS
;
3002 info
= malloc(sizeof(*info
));
3006 memset(info
, 0, sizeof(*info
));
3009 info
->buflen
= get_procfile_size(path
) + BUF_RESERVE_SIZE
;
3011 info
->buf
= malloc(info
->buflen
);
3012 } while (!info
->buf
);
3013 memset(info
->buf
, 0, info
->buflen
);
3014 /* set actual size to buffer size */
3015 info
->size
= info
->buflen
;
3017 fi
->fh
= (unsigned long)info
;
3021 static int proc_release(const char *path
, struct fuse_file_info
*fi
)
3023 struct file_info
*f
= (struct file_info
*)fi
->fh
;
3025 do_release_file_info(f
);
3029 static int proc_read(const char *path
, char *buf
, size_t size
, off_t offset
,
3030 struct fuse_file_info
*fi
)
3032 struct file_info
*f
= (struct file_info
*) fi
->fh
;
3035 case LXC_TYPE_PROC_MEMINFO
:
3036 return proc_meminfo_read(buf
, size
, offset
, fi
);
3037 case LXC_TYPE_PROC_CPUINFO
:
3038 return proc_cpuinfo_read(buf
, size
, offset
, fi
);
3039 case LXC_TYPE_PROC_UPTIME
:
3040 return proc_uptime_read(buf
, size
, offset
, fi
);
3041 case LXC_TYPE_PROC_STAT
:
3042 return proc_stat_read(buf
, size
, offset
, fi
);
3043 case LXC_TYPE_PROC_DISKSTATS
:
3044 return proc_diskstats_read(buf
, size
, offset
, fi
);
3052 * these just delegate to the /proc and /cgroup ops as
3056 static int lxcfs_getattr(const char *path
, struct stat
*sb
)
3058 if (strcmp(path
, "/") == 0) {
3059 sb
->st_mode
= S_IFDIR
| 00755;
3063 if (strncmp(path
, "/cgroup", 7) == 0) {
3064 return cg_getattr(path
, sb
);
3066 if (strncmp(path
, "/proc", 5) == 0) {
3067 return proc_getattr(path
, sb
);
3072 static int lxcfs_opendir(const char *path
, struct fuse_file_info
*fi
)
3074 if (strcmp(path
, "/") == 0)
3077 if (strncmp(path
, "/cgroup", 7) == 0) {
3078 return cg_opendir(path
, fi
);
3080 if (strcmp(path
, "/proc") == 0)
3085 static int lxcfs_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
3086 struct fuse_file_info
*fi
)
3088 if (strcmp(path
, "/") == 0) {
3089 if (filler(buf
, "proc", NULL
, 0) != 0 ||
3090 filler(buf
, "cgroup", NULL
, 0) != 0)
3094 if (strncmp(path
, "/cgroup", 7) == 0)
3095 return cg_readdir(path
, buf
, filler
, offset
, fi
);
3096 if (strcmp(path
, "/proc") == 0)
3097 return proc_readdir(path
, buf
, filler
, offset
, fi
);
3101 static int lxcfs_releasedir(const char *path
, struct fuse_file_info
*fi
)
3103 if (strcmp(path
, "/") == 0)
3105 if (strncmp(path
, "/cgroup", 7) == 0) {
3106 return cg_releasedir(path
, fi
);
3108 if (strcmp(path
, "/proc") == 0)
3113 static int lxcfs_open(const char *path
, struct fuse_file_info
*fi
)
3115 if (strncmp(path
, "/cgroup", 7) == 0)
3116 return cg_open(path
, fi
);
3117 if (strncmp(path
, "/proc", 5) == 0)
3118 return proc_open(path
, fi
);
3123 static int lxcfs_read(const char *path
, char *buf
, size_t size
, off_t offset
,
3124 struct fuse_file_info
*fi
)
3126 if (strncmp(path
, "/cgroup", 7) == 0)
3127 return cg_read(path
, buf
, size
, offset
, fi
);
3128 if (strncmp(path
, "/proc", 5) == 0)
3129 return proc_read(path
, buf
, size
, offset
, fi
);
3134 int lxcfs_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
3135 struct fuse_file_info
*fi
)
3137 if (strncmp(path
, "/cgroup", 7) == 0) {
3138 return cg_write(path
, buf
, size
, offset
, fi
);
3144 static int lxcfs_flush(const char *path
, struct fuse_file_info
*fi
)
3149 static int lxcfs_release(const char *path
, struct fuse_file_info
*fi
)
3151 if (strncmp(path
, "/cgroup", 7) == 0)
3152 return cg_release(path
, fi
);
3153 if (strncmp(path
, "/proc", 5) == 0)
3154 return proc_release(path
, fi
);
3159 static int lxcfs_fsync(const char *path
, int datasync
, struct fuse_file_info
*fi
)
3164 int lxcfs_mkdir(const char *path
, mode_t mode
)
3166 if (strncmp(path
, "/cgroup", 7) == 0)
3167 return cg_mkdir(path
, mode
);
3172 int lxcfs_chown(const char *path
, uid_t uid
, gid_t gid
)
3174 if (strncmp(path
, "/cgroup", 7) == 0)
3175 return cg_chown(path
, uid
, gid
);
3181 * cat first does a truncate before doing ops->write. This doesn't
3182 * really make sense for cgroups. So just return 0 always but do
3185 int lxcfs_truncate(const char *path
, off_t newsize
)
3187 if (strncmp(path
, "/cgroup", 7) == 0)
3192 int lxcfs_rmdir(const char *path
)
3194 if (strncmp(path
, "/cgroup", 7) == 0)
3195 return cg_rmdir(path
);
3199 int lxcfs_chmod(const char *path
, mode_t mode
)
3201 if (strncmp(path
, "/cgroup", 7) == 0)
3202 return cg_chmod(path
, mode
);
3206 const struct fuse_operations lxcfs_ops
= {
3207 .getattr
= lxcfs_getattr
,
3211 .mkdir
= lxcfs_mkdir
,
3213 .rmdir
= lxcfs_rmdir
,
3217 .chmod
= lxcfs_chmod
,
3218 .chown
= lxcfs_chown
,
3219 .truncate
= lxcfs_truncate
,
3224 .release
= lxcfs_release
,
3225 .write
= lxcfs_write
,
3228 .flush
= lxcfs_flush
,
3229 .fsync
= lxcfs_fsync
,
3234 .removexattr
= NULL
,
3236 .opendir
= lxcfs_opendir
,
3237 .readdir
= lxcfs_readdir
,
3238 .releasedir
= lxcfs_releasedir
,
3249 static void usage(const char *me
)
3251 fprintf(stderr
, "Usage:\n");
3252 fprintf(stderr
, "\n");
3253 fprintf(stderr
, "%s mountpoint\n", me
);
3254 fprintf(stderr
, "%s -h\n", me
);
3258 static bool is_help(char *w
)
3260 if (strcmp(w
, "-h") == 0 ||
3261 strcmp(w
, "--help") == 0 ||
3262 strcmp(w
, "-help") == 0 ||
3263 strcmp(w
, "help") == 0)
3268 void swallow_arg(int *argcp
, char *argv
[], char *which
)
3272 for (i
= 1; argv
[i
]; i
++) {
3273 if (strcmp(argv
[i
], which
) != 0)
3275 for (; argv
[i
]; i
++) {
3276 argv
[i
] = argv
[i
+1];
3283 void swallow_option(int *argcp
, char *argv
[], char *opt
, char *v
)
3287 for (i
= 1; argv
[i
]; i
++) {
3290 if (strcmp(argv
[i
], opt
) != 0)
3292 if (strcmp(argv
[i
+1], v
) != 0) {
3293 fprintf(stderr
, "Warning: unexpected fuse option %s\n", v
);
3296 for (; argv
[i
+1]; i
++) {
3297 argv
[i
] = argv
[i
+2];
3304 int main(int argc
, char *argv
[])
3308 * what we pass to fuse_main is:
3309 * argv[0] -s -f -o allow_other,directio argv[1] NULL
3311 int nargs
= 5, cnt
= 0;
3315 /* for travis which runs on 12.04 */
3316 if (glib_check_version (2, 36, 0) != NULL
)
3320 /* accomodate older init scripts */
3321 swallow_arg(&argc
, argv
, "-s");
3322 swallow_arg(&argc
, argv
, "-f");
3323 swallow_option(&argc
, argv
, "-o", "allow_other");
3325 if (argc
== 2 && strcmp(argv
[1], "--version") == 0) {
3326 fprintf(stderr
, "%s\n", VERSION
);
3329 if (argc
!= 2 || is_help(argv
[1]))
3332 newargv
[cnt
++] = argv
[0];
3333 newargv
[cnt
++] = "-f";
3334 newargv
[cnt
++] = "-o";
3335 newargv
[cnt
++] = "allow_other,direct_io,entry_timeout=0.5,attr_timeout=0.5";
3336 newargv
[cnt
++] = argv
[1];
3337 newargv
[cnt
++] = NULL
;
3339 if (!cgfs_setup_controllers())
3342 ret
= fuse_main(nargs
, newargv
, &lxcfs_ops
, NULL
);