3 * Copyright © 2014-2016 Canonical, Inc
4 * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
6 * See COPYING file for details.
9 #define FUSE_USE_VERSION 26
24 #include <linux/sched.h>
25 #include <sys/socket.h>
26 #include <sys/mount.h>
27 #include <sys/epoll.h>
31 #define GLIB_DISABLE_DEPRECATION_WARNINGS
32 #include <glib-object.h>
36 #include "config.h" // for VERSION
41 LXC_TYPE_PROC_MEMINFO
,
42 LXC_TYPE_PROC_CPUINFO
,
45 LXC_TYPE_PROC_DISKSTATS
,
53 char *buf
; // unused as of yet
55 int size
; //actual data size
59 /* reserve buffer size, for cpuall in /proc/stat */
60 #define BUF_RESERVE_SIZE 256
63 * A table caching which pid is init for a pid namespace.
64 * When looking up which pid is init for $qpid, we first
65 * 1. Stat /proc/$qpid/ns/pid.
66 * 2. Check whether the ino_t is in our store.
67 * a. if not, fork a child in qpid's ns to send us
68 * ucred.pid = 1, and read the initpid. Cache
69 * initpid and creation time for /proc/initpid
70 * in a new store entry.
71 * b. if so, verify that /proc/initpid still matches
72 * what we have saved. If not, clear the store
73 * entry and go back to a. If so, return the
76 struct pidns_init_store
{
77 ino_t ino
; // inode number for /proc/$pid/ns/pid
78 pid_t initpid
; // the pid of nit in that ns
79 long int ctime
; // the time at which /proc/$initpid was created
80 struct pidns_init_store
*next
;
84 /* lol - look at how they are allocated in the kernel */
85 #define PIDNS_HASH_SIZE 4096
86 #define HASH(x) ((x) % PIDNS_HASH_SIZE)
88 struct pidns_init_store
*pidns_hash_table
[PIDNS_HASH_SIZE
];
89 static pthread_mutex_t pidns_store_mutex
= PTHREAD_MUTEX_INITIALIZER
;
90 static void lock_mutex(pthread_mutex_t
*l
)
94 if ((ret
= pthread_mutex_lock(l
)) != 0) {
95 fprintf(stderr
, "pthread_mutex_lock returned:%d %s\n", ret
, strerror(ret
));
100 static void unlock_mutex(pthread_mutex_t
*l
)
104 if ((ret
= pthread_mutex_unlock(l
)) != 0) {
105 fprintf(stderr
, "pthread_mutex_unlock returned:%d %s\n", ret
, strerror(ret
));
110 static void store_lock(void)
112 lock_mutex(&pidns_store_mutex
);
115 static void store_unlock(void)
117 unlock_mutex(&pidns_store_mutex
);
120 /* Must be called under store_lock */
121 static bool initpid_still_valid(struct pidns_init_store
*e
, struct stat
*nsfdsb
)
126 snprintf(fnam
, 100, "/proc/%d", e
->initpid
);
127 if (stat(fnam
, &initsb
) < 0)
130 fprintf(stderr
, "comparing ctime %ld %ld for pid %d\n",
131 e
->ctime
, initsb
.st_ctime
, e
->initpid
);
133 if (e
->ctime
!= initsb
.st_ctime
)
138 /* Must be called under store_lock */
139 static void remove_initpid(struct pidns_init_store
*e
)
141 struct pidns_init_store
*tmp
;
145 fprintf(stderr
, "remove_initpid: removing entry for %d\n", e
->initpid
);
148 if (pidns_hash_table
[h
] == e
) {
149 pidns_hash_table
[h
] = e
->next
;
154 tmp
= pidns_hash_table
[h
];
156 if (tmp
->next
== e
) {
166 /* Must be called under store_lock */
167 static void prune_initpid_store(void)
169 static long int last_prune
= 0;
170 struct pidns_init_store
*e
, *prev
, *delme
;
171 long int now
, threshold
;
175 last_prune
= time(NULL
);
179 if (now
< last_prune
+ PURGE_SECS
)
182 fprintf(stderr
, "pruning\n");
185 threshold
= now
- 2 * PURGE_SECS
;
187 for (i
= 0; i
< PIDNS_HASH_SIZE
; i
++) {
188 for (prev
= NULL
, e
= pidns_hash_table
[i
]; e
; ) {
189 if (e
->lastcheck
< threshold
) {
191 fprintf(stderr
, "Removing cached entry for %d\n", e
->initpid
);
195 prev
->next
= e
->next
;
197 pidns_hash_table
[i
] = e
->next
;
208 /* Must be called under store_lock */
209 static void save_initpid(struct stat
*sb
, pid_t pid
)
211 struct pidns_init_store
*e
;
217 fprintf(stderr
, "save_initpid: adding entry for %d\n", pid
);
219 snprintf(fpath
, 100, "/proc/%d", pid
);
220 if (stat(fpath
, &procsb
) < 0)
223 e
= malloc(sizeof(*e
));
227 e
->ctime
= procsb
.st_ctime
;
229 e
->next
= pidns_hash_table
[h
];
230 e
->lastcheck
= time(NULL
);
231 pidns_hash_table
[h
] = e
;
235 * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store
236 * entry for the inode number and creation time. Verify that the init pid
237 * is still valid. If not, remove it. Return the entry if valid, NULL
239 * Must be called under store_lock
241 static struct pidns_init_store
*lookup_verify_initpid(struct stat
*sb
)
243 int h
= HASH(sb
->st_ino
);
244 struct pidns_init_store
*e
= pidns_hash_table
[h
];
247 if (e
->ino
== sb
->st_ino
) {
248 if (initpid_still_valid(e
, sb
)) {
249 e
->lastcheck
= time(NULL
);
261 #define SEND_CREDS_OK 0
262 #define SEND_CREDS_NOTSK 1
263 #define SEND_CREDS_FAIL 2
264 static bool recv_creds(int sock
, struct ucred
*cred
, char *v
);
265 static int wait_for_pid(pid_t pid
);
266 static int send_creds(int sock
, struct ucred
*cred
, char v
, bool pingfirst
);
269 * fork a task which switches to @task's namespace and writes '1'.
270 * over a unix sock so we can read the task's reaper's pid in our
273 static void write_task_init_pid_exit(int sock
, pid_t target
)
281 ret
= snprintf(fnam
, sizeof(fnam
), "/proc/%d/ns/pid", (int)target
);
282 if (ret
< 0 || ret
>= sizeof(fnam
))
285 fd
= open(fnam
, O_RDONLY
);
287 perror("write_task_init_pid_exit open of ns/pid");
291 perror("write_task_init_pid_exit setns 1");
299 if (!wait_for_pid(pid
))
304 /* we are the child */
309 if (send_creds(sock
, &cred
, v
, true) != SEND_CREDS_OK
)
314 static pid_t
get_init_pid_for_task(pid_t task
)
322 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sock
) < 0) {
323 perror("socketpair");
332 write_task_init_pid_exit(sock
[0], task
);
336 if (!recv_creds(sock
[1], &cred
, &v
))
348 static pid_t
lookup_initpid_in_store(pid_t qpid
)
352 struct pidns_init_store
*e
;
355 snprintf(fnam
, 100, "/proc/%d/ns/pid", qpid
);
357 if (stat(fnam
, &sb
) < 0)
359 e
= lookup_verify_initpid(&sb
);
364 answer
= get_init_pid_for_task(qpid
);
366 save_initpid(&sb
, answer
);
369 /* we prune at end in case we are returning
370 * the value we were about to return */
371 prune_initpid_store();
376 static int wait_for_pid(pid_t pid
)
384 ret
= waitpid(pid
, &status
, 0);
392 if (!WIFEXITED(status
) || WEXITSTATUS(status
) != 0)
399 * append pid to *src.
400 * src: a pointer to a char* in which ot append the pid.
401 * sz: the number of characters printed so far, minus trailing \0.
402 * asz: the allocated size so far
403 * pid: the pid to append
405 static void must_strcat_pid(char **src
, size_t *sz
, size_t *asz
, pid_t pid
)
409 int tmplen
= sprintf(tmp
, "%d\n", (int)pid
);
411 if (!*src
|| tmplen
+ *sz
+ 1 >= *asz
) {
414 tmp
= realloc(*src
, *asz
+ BUF_RESERVE_SIZE
);
417 *asz
+= BUF_RESERVE_SIZE
;
419 memcpy((*src
) +*sz
, tmp
, tmplen
);
425 * Given a open file * to /proc/pid/{u,g}id_map, and an id
426 * valid in the caller's namespace, return the id mapped into
428 * Returns the mapped id, or -1 on error.
431 convert_id_to_ns(FILE *idfile
, unsigned int in_id
)
433 unsigned int nsuid
, // base id for a range in the idfile's namespace
434 hostuid
, // base id for a range in the caller's namespace
435 count
; // number of ids in this range
439 fseek(idfile
, 0L, SEEK_SET
);
440 while (fgets(line
, 400, idfile
)) {
441 ret
= sscanf(line
, "%u %u %u\n", &nsuid
, &hostuid
, &count
);
444 if (hostuid
+ count
< hostuid
|| nsuid
+ count
< nsuid
) {
446 * uids wrapped around - unexpected as this is a procfile,
449 fprintf(stderr
, "pid wrapparound at entry %u %u %u in %s\n",
450 nsuid
, hostuid
, count
, line
);
453 if (hostuid
<= in_id
&& hostuid
+count
> in_id
) {
455 * now since hostuid <= in_id < hostuid+count, and
456 * hostuid+count and nsuid+count do not wrap around,
457 * we know that nsuid+(in_id-hostuid) which must be
458 * less that nsuid+(count) must not wrap around
460 return (in_id
- hostuid
) + nsuid
;
469 * for is_privileged_over,
470 * specify whether we require the calling uid to be root in his
473 #define NS_ROOT_REQD true
474 #define NS_ROOT_OPT false
478 static bool is_privileged_over(pid_t pid
, uid_t uid
, uid_t victim
, bool req_ns_root
)
485 if (victim
== -1 || uid
== -1)
489 * If the request is one not requiring root in the namespace,
490 * then having the same uid suffices. (i.e. uid 1000 has write
491 * access to files owned by uid 1000
493 if (!req_ns_root
&& uid
== victim
)
496 ret
= snprintf(fpath
, PROCLEN
, "/proc/%d/uid_map", pid
);
497 if (ret
< 0 || ret
>= PROCLEN
)
499 FILE *f
= fopen(fpath
, "r");
503 /* if caller's not root in his namespace, reject */
504 nsuid
= convert_id_to_ns(f
, uid
);
509 * If victim is not mapped into caller's ns, reject.
510 * XXX I'm not sure this check is needed given that fuse
511 * will be sending requests where the vfs has converted
513 nsuid
= convert_id_to_ns(f
, victim
);
524 static bool perms_include(int fmode
, mode_t req_mode
)
528 switch (req_mode
& O_ACCMODE
) {
536 r
= S_IROTH
| S_IWOTH
;
541 return ((fmode
& r
) == r
);
547 * querycg is /a/b/c/d/e
550 static char *get_next_cgroup_dir(const char *taskcg
, const char *querycg
)
554 if (strlen(taskcg
) <= strlen(querycg
)) {
555 fprintf(stderr
, "%s: I was fed bad input\n", __func__
);
559 if (strcmp(querycg
, "/") == 0)
560 start
= strdup(taskcg
+ 1);
562 start
= strdup(taskcg
+ strlen(querycg
) + 1);
565 end
= strchr(start
, '/');
571 static void stripnewline(char *x
)
573 size_t l
= strlen(x
);
574 if (l
&& x
[l
-1] == '\n')
578 static char *get_pid_cgroup(pid_t pid
, const char *contrl
)
586 const char *h
= find_mounted_controller(contrl
);
590 ret
= snprintf(fnam
, PROCLEN
, "/proc/%d/cgroup", pid
);
591 if (ret
< 0 || ret
>= PROCLEN
)
593 if (!(f
= fopen(fnam
, "r")))
596 while (getline(&line
, &len
, f
) != -1) {
600 c1
= strchr(line
, ':');
604 c2
= strchr(c1
, ':');
608 if (strcmp(c1
, h
) != 0)
625 * check whether a fuse context may access a cgroup dir or file
627 * If file is not null, it is a cgroup file to check under cg.
628 * If file is null, then we are checking perms on cg itself.
630 * For files we can check the mode of the list_keys result.
631 * For cgroups, we must make assumptions based on the files under the
632 * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups
635 static bool fc_may_access(struct fuse_context
*fc
, const char *contrl
, const char *cg
, const char *file
, mode_t mode
)
637 struct cgfs_files
*k
= NULL
;
640 k
= cgfs_get_key(contrl
, cg
, file
);
644 if (is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_OPT
)) {
645 if (perms_include(k
->mode
>> 6, mode
)) {
650 if (fc
->gid
== k
->gid
) {
651 if (perms_include(k
->mode
>> 3, mode
)) {
656 ret
= perms_include(k
->mode
, mode
);
663 #define INITSCOPE "/init.scope"
664 static void prune_init_slice(char *cg
)
667 point
= cg
+ strlen(cg
) - strlen(INITSCOPE
);
670 if (strcmp(point
, INITSCOPE
) == 0) {
679 * If pid is in /a/b/c/d, he may only act on things under cg=/a/b/c/d.
680 * If pid is in /a, he may act on /a/b, but not on /b.
681 * if the answer is false and nextcg is not NULL, then *nextcg will point
682 * to a string containing the next cgroup directory under cg, which must be
683 * freed by the caller.
685 static bool caller_is_in_ancestor(pid_t pid
, const char *contrl
, const char *cg
, char **nextcg
)
688 char *c2
= get_pid_cgroup(pid
, contrl
);
693 prune_init_slice(c2
);
696 * callers pass in '/' for root cgroup, otherwise they pass
697 * in a cgroup without leading '/'
699 linecmp
= *cg
== '/' ? c2
: c2
+1;
700 if (strncmp(linecmp
, cg
, strlen(linecmp
)) != 0) {
702 *nextcg
= get_next_cgroup_dir(linecmp
, cg
);
714 * If pid is in /a/b/c, he may see that /a exists, but not /b or /a/c.
716 static bool caller_may_see_dir(pid_t pid
, const char *contrl
, const char *cg
)
720 size_t target_len
, task_len
;
722 if (strcmp(cg
, "/") == 0)
725 c2
= get_pid_cgroup(pid
, contrl
);
728 prune_init_slice(c2
);
731 target_len
= strlen(cg
);
732 task_len
= strlen(task_cg
);
734 /* Task is in the root cg, it can see everything. This case is
735 * not handled by the strmcps below, since they test for the
736 * last /, but that is the first / that we've chopped off
742 if (strcmp(cg
, task_cg
) == 0) {
746 if (target_len
< task_len
) {
747 /* looking up a parent dir */
748 if (strncmp(task_cg
, cg
, target_len
) == 0 && task_cg
[target_len
] == '/')
752 if (target_len
> task_len
) {
753 /* looking up a child dir */
754 if (strncmp(task_cg
, cg
, task_len
) == 0 && cg
[task_len
] == '/')
765 * given /cgroup/freezer/a/b, return "freezer".
766 * the returned char* should NOT be freed.
768 static char *pick_controller_from_path(struct fuse_context
*fc
, const char *path
)
773 if (strlen(path
) < 9)
775 if (*(path
+7) != '/')
781 slash
= strstr(contr
, "/");
786 for (i
= 0; i
< num_hierarchies
; i
++) {
787 if (hierarchies
[i
] && strcmp(hierarchies
[i
], contr
) == 0)
788 return hierarchies
[i
];
794 * Find the start of cgroup in /cgroup/controller/the/cgroup/path
795 * Note that the returned value may include files (keynames) etc
797 static const char *find_cgroup_in_path(const char *path
)
801 if (strlen(path
) < 9)
803 p1
= strstr(path
+8, "/");
810 * split the last path element from the path in @cg.
811 * @dir is newly allocated and should be freed, @last not
813 static void get_cgdir_and_path(const char *cg
, char **dir
, char **last
)
820 *last
= strrchr(cg
, '/');
825 p
= strrchr(*dir
, '/');
830 * FUSE ops for /cgroup
833 static int cg_getattr(const char *path
, struct stat
*sb
)
836 struct fuse_context
*fc
= fuse_get_context();
838 char *last
= NULL
, *path1
, *path2
;
839 struct cgfs_files
*k
= NULL
;
841 const char *controller
= NULL
;
848 memset(sb
, 0, sizeof(struct stat
));
850 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
853 sb
->st_uid
= sb
->st_gid
= 0;
854 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
857 if (strcmp(path
, "/cgroup") == 0) {
858 sb
->st_mode
= S_IFDIR
| 00755;
863 controller
= pick_controller_from_path(fc
, path
);
866 cgroup
= find_cgroup_in_path(path
);
868 /* this is just /cgroup/controller, return it as a dir */
869 sb
->st_mode
= S_IFDIR
| 00755;
874 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
884 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
887 /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys.
888 * Then check that caller's cgroup is under path if last is a child
889 * cgroup, or cgdir if last is a file */
891 if (is_child_cgroup(controller
, path1
, path2
)) {
892 if (!caller_may_see_dir(initpid
, controller
, cgroup
)) {
896 if (!caller_is_in_ancestor(initpid
, controller
, cgroup
, NULL
)) {
897 /* this is just /cgroup/controller, return it as a dir */
898 sb
->st_mode
= S_IFDIR
| 00555;
903 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
)) {
908 // get uid, gid, from '/tasks' file and make up a mode
909 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
910 sb
->st_mode
= S_IFDIR
| 00755;
911 k
= cgfs_get_key(controller
, cgroup
, NULL
);
913 sb
->st_uid
= sb
->st_gid
= 0;
924 if ((k
= cgfs_get_key(controller
, path1
, path2
)) != NULL
) {
925 sb
->st_mode
= S_IFREG
| k
->mode
;
931 if (!caller_is_in_ancestor(initpid
, controller
, path1
, NULL
)) {
935 if (!fc_may_access(fc
, controller
, path1
, path2
, O_RDONLY
)) {
948 static int cg_opendir(const char *path
, struct fuse_file_info
*fi
)
950 struct fuse_context
*fc
= fuse_get_context();
952 struct file_info
*dir_info
;
953 char *controller
= NULL
;
958 if (strcmp(path
, "/cgroup") == 0) {
962 // return list of keys for the controller, and list of child cgroups
963 controller
= pick_controller_from_path(fc
, path
);
967 cgroup
= find_cgroup_in_path(path
);
969 /* this is just /cgroup/controller, return its contents */
974 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
978 if (!caller_may_see_dir(initpid
, controller
, cgroup
))
980 if (!fc_may_access(fc
, controller
, cgroup
, NULL
, O_RDONLY
))
984 /* we'll free this at cg_releasedir */
985 dir_info
= malloc(sizeof(*dir_info
));
988 dir_info
->controller
= must_copy_string(controller
);
989 dir_info
->cgroup
= must_copy_string(cgroup
);
990 dir_info
->type
= LXC_TYPE_CGDIR
;
991 dir_info
->buf
= NULL
;
992 dir_info
->file
= NULL
;
993 dir_info
->buflen
= 0;
995 fi
->fh
= (unsigned long)dir_info
;
999 static int cg_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
1000 struct fuse_file_info
*fi
)
1002 struct file_info
*d
= (struct file_info
*)fi
->fh
;
1003 struct cgfs_files
**list
= NULL
;
1005 char *nextcg
= NULL
;
1006 struct fuse_context
*fc
= fuse_get_context();
1007 char **clist
= NULL
;
1009 if (d
->type
!= LXC_TYPE_CGDIR
) {
1010 fprintf(stderr
, "Internal error: file cache info used in readdir\n");
1013 if (!d
->cgroup
&& !d
->controller
) {
1014 // ls /var/lib/lxcfs/cgroup - just show list of controllers
1017 for (i
= 0; i
< num_hierarchies
; i
++) {
1018 if (hierarchies
[i
] && filler(buf
, hierarchies
[i
], NULL
, 0) != 0) {
1025 if (!cgfs_list_keys(d
->controller
, d
->cgroup
, &list
)) {
1026 // not a valid cgroup
1031 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
1034 if (!caller_is_in_ancestor(initpid
, d
->controller
, d
->cgroup
, &nextcg
)) {
1037 ret
= filler(buf
, nextcg
, NULL
, 0);
1048 for (i
= 0; list
[i
]; i
++) {
1049 if (filler(buf
, list
[i
]->name
, NULL
, 0) != 0) {
1055 // now get the list of child cgroups
1057 if (!cgfs_list_children(d
->controller
, d
->cgroup
, &clist
)) {
1061 for (i
= 0; clist
[i
]; i
++) {
1062 if (filler(buf
, clist
[i
], NULL
, 0) != 0) {
1072 for (i
= 0; clist
[i
]; i
++)
1079 static void do_release_file_info(struct file_info
*f
)
1083 free(f
->controller
);
1090 static int cg_releasedir(const char *path
, struct fuse_file_info
*fi
)
1092 struct file_info
*d
= (struct file_info
*)fi
->fh
;
1094 do_release_file_info(d
);
1098 static int cg_open(const char *path
, struct fuse_file_info
*fi
)
1101 char *last
= NULL
, *path1
, *path2
, * cgdir
= NULL
, *controller
;
1102 struct cgfs_files
*k
= NULL
;
1103 struct file_info
*file_info
;
1104 struct fuse_context
*fc
= fuse_get_context();
1110 controller
= pick_controller_from_path(fc
, path
);
1113 cgroup
= find_cgroup_in_path(path
);
1117 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
1126 k
= cgfs_get_key(controller
, path1
, path2
);
1133 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
1136 if (!caller_may_see_dir(initpid
, controller
, path1
)) {
1140 if (!fc_may_access(fc
, controller
, path1
, path2
, fi
->flags
)) {
1141 // should never get here
1146 /* we'll free this at cg_release */
1147 file_info
= malloc(sizeof(*file_info
));
1152 file_info
->controller
= must_copy_string(controller
);
1153 file_info
->cgroup
= must_copy_string(path1
);
1154 file_info
->file
= must_copy_string(path2
);
1155 file_info
->type
= LXC_TYPE_CGFILE
;
1156 file_info
->buf
= NULL
;
1157 file_info
->buflen
= 0;
1159 fi
->fh
= (unsigned long)file_info
;
1167 static int cg_release(const char *path
, struct fuse_file_info
*fi
)
1169 struct file_info
*f
= (struct file_info
*)fi
->fh
;
1171 do_release_file_info(f
);
1175 #define POLLIN_SET ( EPOLLIN | EPOLLHUP | EPOLLRDHUP )
1177 static bool wait_for_sock(int sock
, int timeout
)
1179 struct epoll_event ev
;
1180 int epfd
, ret
, now
, starttime
, deltatime
, saved_errno
;
1182 if ((starttime
= time(NULL
)) < 0)
1185 if ((epfd
= epoll_create(1)) < 0) {
1186 fprintf(stderr
, "Failed to create epoll socket: %m\n");
1190 ev
.events
= POLLIN_SET
;
1192 if (epoll_ctl(epfd
, EPOLL_CTL_ADD
, sock
, &ev
) < 0) {
1193 fprintf(stderr
, "Failed adding socket to epoll: %m\n");
1199 if ((now
= time(NULL
)) < 0) {
1204 deltatime
= (starttime
+ timeout
) - now
;
1205 if (deltatime
< 0) { // timeout
1210 ret
= epoll_wait(epfd
, &ev
, 1, 1000*deltatime
+ 1);
1211 if (ret
< 0 && errno
== EINTR
)
1213 saved_errno
= errno
;
1217 errno
= saved_errno
;
1223 static int msgrecv(int sockfd
, void *buf
, size_t len
)
1225 if (!wait_for_sock(sockfd
, 2))
1227 return recv(sockfd
, buf
, len
, MSG_DONTWAIT
);
1230 static int send_creds(int sock
, struct ucred
*cred
, char v
, bool pingfirst
)
1232 struct msghdr msg
= { 0 };
1234 struct cmsghdr
*cmsg
;
1235 char cmsgbuf
[CMSG_SPACE(sizeof(*cred
))];
1240 if (msgrecv(sock
, buf
, 1) != 1) {
1241 fprintf(stderr
, "%s: Error getting reply from server over socketpair\n",
1243 return SEND_CREDS_FAIL
;
1247 msg
.msg_control
= cmsgbuf
;
1248 msg
.msg_controllen
= sizeof(cmsgbuf
);
1250 cmsg
= CMSG_FIRSTHDR(&msg
);
1251 cmsg
->cmsg_len
= CMSG_LEN(sizeof(struct ucred
));
1252 cmsg
->cmsg_level
= SOL_SOCKET
;
1253 cmsg
->cmsg_type
= SCM_CREDENTIALS
;
1254 memcpy(CMSG_DATA(cmsg
), cred
, sizeof(*cred
));
1256 msg
.msg_name
= NULL
;
1257 msg
.msg_namelen
= 0;
1261 iov
.iov_len
= sizeof(buf
);
1265 if (sendmsg(sock
, &msg
, 0) < 0) {
1266 fprintf(stderr
, "%s: failed at sendmsg: %s\n", __func__
,
1269 return SEND_CREDS_NOTSK
;
1270 return SEND_CREDS_FAIL
;
1273 return SEND_CREDS_OK
;
1276 static bool recv_creds(int sock
, struct ucred
*cred
, char *v
)
1278 struct msghdr msg
= { 0 };
1280 struct cmsghdr
*cmsg
;
1281 char cmsgbuf
[CMSG_SPACE(sizeof(*cred
))];
1292 if (setsockopt(sock
, SOL_SOCKET
, SO_PASSCRED
, &optval
, sizeof(optval
)) == -1) {
1293 fprintf(stderr
, "Failed to set passcred: %s\n", strerror(errno
));
1297 if (write(sock
, buf
, 1) != 1) {
1298 fprintf(stderr
, "Failed to start write on scm fd: %s\n", strerror(errno
));
1302 msg
.msg_name
= NULL
;
1303 msg
.msg_namelen
= 0;
1304 msg
.msg_control
= cmsgbuf
;
1305 msg
.msg_controllen
= sizeof(cmsgbuf
);
1308 iov
.iov_len
= sizeof(buf
);
1312 if (!wait_for_sock(sock
, 2)) {
1313 fprintf(stderr
, "Timed out waiting for scm_cred: %s\n",
1317 ret
= recvmsg(sock
, &msg
, MSG_DONTWAIT
);
1319 fprintf(stderr
, "Failed to receive scm_cred: %s\n",
1324 cmsg
= CMSG_FIRSTHDR(&msg
);
1326 if (cmsg
&& cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)) &&
1327 cmsg
->cmsg_level
== SOL_SOCKET
&&
1328 cmsg
->cmsg_type
== SCM_CREDENTIALS
) {
1329 memcpy(cred
, CMSG_DATA(cmsg
), sizeof(*cred
));
1338 * pid_to_ns - reads pids from a ucred over a socket, then writes the
1339 * int value back over the socket. This shifts the pid from the
1340 * sender's pidns into tpid's pidns.
1342 static void pid_to_ns(int sock
, pid_t tpid
)
1347 while (recv_creds(sock
, &cred
, &v
)) {
1350 if (write(sock
, &cred
.pid
, sizeof(pid_t
)) != sizeof(pid_t
))
1357 * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain
1358 * in your old pidns. Only children which you fork will be in the target
1359 * pidns. So the pid_to_ns_wrapper does the setns, then forks a child to
1360 * actually convert pids
1362 static void pid_to_ns_wrapper(int sock
, pid_t tpid
)
1364 int newnsfd
= -1, ret
, cpipe
[2];
1369 ret
= snprintf(fnam
, sizeof(fnam
), "/proc/%d/ns/pid", tpid
);
1370 if (ret
< 0 || ret
>= sizeof(fnam
))
1372 newnsfd
= open(fnam
, O_RDONLY
);
1375 if (setns(newnsfd
, 0) < 0)
1379 if (pipe(cpipe
) < 0)
1389 if (write(cpipe
[1], &b
, sizeof(char)) < 0) {
1390 fprintf(stderr
, "%s (child): erorr on write: %s\n",
1391 __func__
, strerror(errno
));
1394 pid_to_ns(sock
, tpid
);
1395 _exit(1); // not reached
1397 // give the child 1 second to be done forking and
1399 if (!wait_for_sock(cpipe
[0], 1))
1401 ret
= read(cpipe
[0], &v
, 1);
1402 if (ret
!= sizeof(char) || v
!= '1')
1405 if (!wait_for_pid(cpid
))
1411 * To read cgroup files with a particular pid, we will setns into the child
1412 * pidns, open a pipe, fork a child - which will be the first to really be in
1413 * the child ns - which does the cgfs_get_value and writes the data to the pipe.
1415 static bool do_read_pids(pid_t tpid
, const char *contrl
, const char *cg
, const char *file
, char **d
)
1417 int sock
[2] = {-1, -1};
1418 char *tmpdata
= NULL
;
1420 pid_t qpid
, cpid
= -1;
1421 bool answer
= false;
1424 size_t sz
= 0, asz
= 0;
1426 if (!cgfs_get_value(contrl
, cg
, file
, &tmpdata
))
1430 * Now we read the pids from returned data one by one, pass
1431 * them into a child in the target namespace, read back the
1432 * translated pids, and put them into our to-return data
1435 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sock
) < 0) {
1436 perror("socketpair");
1445 if (!cpid
) // child - exits when done
1446 pid_to_ns_wrapper(sock
[1], tpid
);
1448 char *ptr
= tmpdata
;
1451 while (sscanf(ptr
, "%d\n", &qpid
) == 1) {
1453 ret
= send_creds(sock
[0], &cred
, v
, true);
1455 if (ret
== SEND_CREDS_NOTSK
)
1457 if (ret
== SEND_CREDS_FAIL
)
1460 // read converted results
1461 if (!wait_for_sock(sock
[0], 2)) {
1462 fprintf(stderr
, "%s: timed out waiting for pid from child: %s\n",
1463 __func__
, strerror(errno
));
1466 if (read(sock
[0], &qpid
, sizeof(qpid
)) != sizeof(qpid
)) {
1467 fprintf(stderr
, "%s: error reading pid from child: %s\n",
1468 __func__
, strerror(errno
));
1471 must_strcat_pid(d
, &sz
, &asz
, qpid
);
1473 ptr
= strchr(ptr
, '\n');
1479 cred
.pid
= getpid();
1481 if (send_creds(sock
[0], &cred
, v
, true) != SEND_CREDS_OK
) {
1482 // failed to ask child to exit
1483 fprintf(stderr
, "%s: failed to ask child to exit: %s\n",
1484 __func__
, strerror(errno
));
1494 if (sock
[0] != -1) {
1501 static int cg_read(const char *path
, char *buf
, size_t size
, off_t offset
,
1502 struct fuse_file_info
*fi
)
1504 struct fuse_context
*fc
= fuse_get_context();
1505 struct file_info
*f
= (struct file_info
*)fi
->fh
;
1506 struct cgfs_files
*k
= NULL
;
1511 if (f
->type
!= LXC_TYPE_CGFILE
) {
1512 fprintf(stderr
, "Internal error: directory cache info used in cg_read\n");
1525 if ((k
= cgfs_get_key(f
->controller
, f
->cgroup
, f
->file
)) == NULL
) {
1531 if (!fc_may_access(fc
, f
->controller
, f
->cgroup
, f
->file
, O_RDONLY
)) { // should never get here
1536 if (strcmp(f
->file
, "tasks") == 0 ||
1537 strcmp(f
->file
, "/tasks") == 0 ||
1538 strcmp(f
->file
, "/cgroup.procs") == 0 ||
1539 strcmp(f
->file
, "cgroup.procs") == 0)
1540 // special case - we have to translate the pids
1541 r
= do_read_pids(fc
->pid
, f
->controller
, f
->cgroup
, f
->file
, &data
);
1543 r
= cgfs_get_value(f
->controller
, f
->cgroup
, f
->file
, &data
);
1557 memcpy(buf
, data
, s
);
1558 if (s
> 0 && s
< size
&& data
[s
-1] != '\n')
1568 static void pid_from_ns(int sock
, pid_t tpid
)
1578 if (!wait_for_sock(sock
, 2)) {
1579 fprintf(stderr
, "%s: timeout reading from parent\n", __func__
);
1582 if ((ret
= read(sock
, &vpid
, sizeof(pid_t
))) != sizeof(pid_t
)) {
1583 fprintf(stderr
, "%s: bad read from parent: %s\n",
1584 __func__
, strerror(errno
));
1587 if (vpid
== -1) // done
1591 if (send_creds(sock
, &cred
, v
, true) != SEND_CREDS_OK
) {
1593 cred
.pid
= getpid();
1594 if (send_creds(sock
, &cred
, v
, false) != SEND_CREDS_OK
)
1601 static void pid_from_ns_wrapper(int sock
, pid_t tpid
)
1603 int newnsfd
= -1, ret
, cpipe
[2];
1608 ret
= snprintf(fnam
, sizeof(fnam
), "/proc/%d/ns/pid", tpid
);
1609 if (ret
< 0 || ret
>= sizeof(fnam
))
1611 newnsfd
= open(fnam
, O_RDONLY
);
1614 if (setns(newnsfd
, 0) < 0)
1618 if (pipe(cpipe
) < 0)
1630 if (write(cpipe
[1], &b
, sizeof(char)) < 0) {
1631 fprintf(stderr
, "%s (child): erorr on write: %s\n",
1632 __func__
, strerror(errno
));
1635 pid_from_ns(sock
, tpid
);
1638 // give the child 1 second to be done forking and
1640 if (!wait_for_sock(cpipe
[0], 1))
1642 ret
= read(cpipe
[0], &v
, 1);
1643 if (ret
!= sizeof(char) || v
!= '1') {
1647 if (!wait_for_pid(cpid
))
1652 kill(cpid
, SIGKILL
);
1658 * Given host @uid, return the uid to which it maps in
1659 * @pid's user namespace, or -1 if none.
1661 bool hostuid_to_ns(uid_t uid
, pid_t pid
, uid_t
*answer
)
1666 sprintf(line
, "/proc/%d/uid_map", pid
);
1667 if ((f
= fopen(line
, "r")) == NULL
) {
1671 *answer
= convert_id_to_ns(f
, uid
);
1680 * get_pid_creds: get the real uid and gid of @pid from
1682 * (XXX should we use euid here?)
1684 void get_pid_creds(pid_t pid
, uid_t
*uid
, gid_t
*gid
)
1693 sprintf(line
, "/proc/%d/status", pid
);
1694 if ((f
= fopen(line
, "r")) == NULL
) {
1695 fprintf(stderr
, "Error opening %s: %s\n", line
, strerror(errno
));
1698 while (fgets(line
, 400, f
)) {
1699 if (strncmp(line
, "Uid:", 4) == 0) {
1700 if (sscanf(line
+4, "%u", &u
) != 1) {
1701 fprintf(stderr
, "bad uid line for pid %u\n", pid
);
1706 } else if (strncmp(line
, "Gid:", 4) == 0) {
1707 if (sscanf(line
+4, "%u", &g
) != 1) {
1708 fprintf(stderr
, "bad gid line for pid %u\n", pid
);
1719 * May the requestor @r move victim @v to a new cgroup?
1720 * This is allowed if
1721 * . they are the same task
1722 * . they are ownedy by the same uid
1723 * . @r is root on the host, or
1724 * . @v's uid is mapped into @r's where @r is root.
1726 bool may_move_pid(pid_t r
, uid_t r_uid
, pid_t v
)
1728 uid_t v_uid
, tmpuid
;
1735 get_pid_creds(v
, &v_uid
, &v_gid
);
1738 if (hostuid_to_ns(r_uid
, r
, &tmpuid
) && tmpuid
== 0
1739 && hostuid_to_ns(v_uid
, r
, &tmpuid
))
1744 static bool do_write_pids(pid_t tpid
, uid_t tuid
, const char *contrl
, const char *cg
,
1745 const char *file
, const char *buf
)
1747 int sock
[2] = {-1, -1};
1748 pid_t qpid
, cpid
= -1;
1749 FILE *pids_file
= NULL
;
1750 bool answer
= false, fail
= false;
1752 pids_file
= open_pids_file(contrl
, cg
);
1757 * write the pids to a socket, have helper in writer's pidns
1758 * call movepid for us
1760 if (socketpair(AF_UNIX
, SOCK_DGRAM
, 0, sock
) < 0) {
1761 perror("socketpair");
1769 if (!cpid
) { // child
1771 pid_from_ns_wrapper(sock
[1], tpid
);
1774 const char *ptr
= buf
;
1775 while (sscanf(ptr
, "%d", &qpid
) == 1) {
1779 if (write(sock
[0], &qpid
, sizeof(qpid
)) != sizeof(qpid
)) {
1780 fprintf(stderr
, "%s: error writing pid to child: %s\n",
1781 __func__
, strerror(errno
));
1785 if (recv_creds(sock
[0], &cred
, &v
)) {
1787 if (!may_move_pid(tpid
, tuid
, cred
.pid
)) {
1791 if (fprintf(pids_file
, "%d", (int) cred
.pid
) < 0)
1796 ptr
= strchr(ptr
, '\n');
1802 /* All good, write the value */
1804 if (write(sock
[0], &qpid
,sizeof(qpid
)) != sizeof(qpid
))
1805 fprintf(stderr
, "Warning: failed to ask child to exit\n");
1813 if (sock
[0] != -1) {
1818 if (fclose(pids_file
) != 0)
1824 int cg_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
1825 struct fuse_file_info
*fi
)
1827 struct fuse_context
*fc
= fuse_get_context();
1828 char *localbuf
= NULL
;
1829 struct cgfs_files
*k
= NULL
;
1830 struct file_info
*f
= (struct file_info
*)fi
->fh
;
1833 if (f
->type
!= LXC_TYPE_CGFILE
) {
1834 fprintf(stderr
, "Internal error: directory cache info used in cg_write\n");
1844 localbuf
= alloca(size
+1);
1845 localbuf
[size
] = '\0';
1846 memcpy(localbuf
, buf
, size
);
1848 if ((k
= cgfs_get_key(f
->controller
, f
->cgroup
, f
->file
)) == NULL
) {
1853 if (!fc_may_access(fc
, f
->controller
, f
->cgroup
, f
->file
, O_WRONLY
)) {
1858 if (strcmp(f
->file
, "tasks") == 0 ||
1859 strcmp(f
->file
, "/tasks") == 0 ||
1860 strcmp(f
->file
, "/cgroup.procs") == 0 ||
1861 strcmp(f
->file
, "cgroup.procs") == 0)
1862 // special case - we have to translate the pids
1863 r
= do_write_pids(fc
->pid
, fc
->uid
, f
->controller
, f
->cgroup
, f
->file
, localbuf
);
1865 r
= cgfs_set_value(f
->controller
, f
->cgroup
, f
->file
, localbuf
);
1875 int cg_chown(const char *path
, uid_t uid
, gid_t gid
)
1877 struct fuse_context
*fc
= fuse_get_context();
1878 char *cgdir
= NULL
, *last
= NULL
, *path1
, *path2
, *controller
;
1879 struct cgfs_files
*k
= NULL
;
1886 if (strcmp(path
, "/cgroup") == 0)
1889 controller
= pick_controller_from_path(fc
, path
);
1892 cgroup
= find_cgroup_in_path(path
);
1894 /* this is just /cgroup/controller */
1897 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
1907 if (is_child_cgroup(controller
, path1
, path2
)) {
1908 // get uid, gid, from '/tasks' file and make up a mode
1909 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1910 k
= cgfs_get_key(controller
, cgroup
, "tasks");
1913 k
= cgfs_get_key(controller
, path1
, path2
);
1921 * This being a fuse request, the uid and gid must be valid
1922 * in the caller's namespace. So we can just check to make
1923 * sure that the caller is root in his uid, and privileged
1924 * over the file's current owner.
1926 if (!is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_REQD
)) {
1931 ret
= cgfs_chown_file(controller
, cgroup
, uid
, gid
);
1940 int cg_chmod(const char *path
, mode_t mode
)
1942 struct fuse_context
*fc
= fuse_get_context();
1943 char * cgdir
= NULL
, *last
= NULL
, *path1
, *path2
, *controller
;
1944 struct cgfs_files
*k
= NULL
;
1951 if (strcmp(path
, "/cgroup") == 0)
1954 controller
= pick_controller_from_path(fc
, path
);
1957 cgroup
= find_cgroup_in_path(path
);
1959 /* this is just /cgroup/controller */
1962 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
1972 if (is_child_cgroup(controller
, path1
, path2
)) {
1973 // get uid, gid, from '/tasks' file and make up a mode
1974 // That is a hack, until cgmanager gains a GetCgroupPerms fn.
1975 k
= cgfs_get_key(controller
, cgroup
, "tasks");
1978 k
= cgfs_get_key(controller
, path1
, path2
);
1986 * This being a fuse request, the uid and gid must be valid
1987 * in the caller's namespace. So we can just check to make
1988 * sure that the caller is root in his uid, and privileged
1989 * over the file's current owner.
1991 if (!is_privileged_over(fc
->pid
, fc
->uid
, k
->uid
, NS_ROOT_OPT
)) {
1996 if (!cgfs_chmod_file(controller
, cgroup
, mode
)) {
2008 int cg_mkdir(const char *path
, mode_t mode
)
2010 struct fuse_context
*fc
= fuse_get_context();
2011 char *last
= NULL
, *path1
, *cgdir
= NULL
, *controller
, *next
= NULL
;
2019 controller
= pick_controller_from_path(fc
, path
);
2023 cgroup
= find_cgroup_in_path(path
);
2027 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
2033 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2036 if (!caller_is_in_ancestor(initpid
, controller
, path1
, &next
)) {
2039 else if (last
&& strcmp(next
, last
) == 0)
2046 if (!fc_may_access(fc
, controller
, path1
, NULL
, O_RDWR
)) {
2050 if (!caller_is_in_ancestor(initpid
, controller
, path1
, NULL
)) {
2055 ret
= cgfs_create(controller
, cgroup
, fc
->uid
, fc
->gid
);
2063 static int cg_rmdir(const char *path
)
2065 struct fuse_context
*fc
= fuse_get_context();
2066 char *last
= NULL
, *cgdir
= NULL
, *controller
, *next
= NULL
;
2073 controller
= pick_controller_from_path(fc
, path
);
2077 cgroup
= find_cgroup_in_path(path
);
2081 get_cgdir_and_path(cgroup
, &cgdir
, &last
);
2087 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2090 if (!caller_is_in_ancestor(initpid
, controller
, cgroup
, &next
)) {
2091 if (!last
|| strcmp(next
, last
) == 0)
2098 if (!fc_may_access(fc
, controller
, cgdir
, NULL
, O_WRONLY
)) {
2102 if (!caller_is_in_ancestor(initpid
, controller
, cgroup
, NULL
)) {
2107 if (!cgfs_remove(controller
, cgroup
)) {
2120 static bool startswith(const char *line
, const char *pref
)
2122 if (strncmp(line
, pref
, strlen(pref
)) == 0)
2127 static void get_mem_cached(char *memstat
, unsigned long *v
)
2133 if (startswith(memstat
, "total_cache")) {
2134 sscanf(memstat
+ 11, "%lu", v
);
2138 eol
= strchr(memstat
, '\n');
2145 static void get_blkio_io_value(char *str
, unsigned major
, unsigned minor
, char *iotype
, unsigned long *v
)
2151 snprintf(key
, 32, "%u:%u %s", major
, minor
, iotype
);
2153 size_t len
= strlen(key
);
2157 if (startswith(str
, key
)) {
2158 sscanf(str
+ len
, "%lu", v
);
2161 eol
= strchr(str
, '\n');
2168 static int read_file(const char *path
, char *buf
, size_t size
,
2169 struct file_info
*d
)
2171 size_t linelen
= 0, total_len
= 0, rv
= 0;
2173 char *cache
= d
->buf
;
2174 size_t cache_size
= d
->buflen
;
2175 FILE *f
= fopen(path
, "r");
2179 while (getline(&line
, &linelen
, f
) != -1) {
2180 size_t l
= snprintf(cache
, cache_size
, "%s", line
);
2182 perror("Error writing to cache");
2186 if (l
>= cache_size
) {
2187 fprintf(stderr
, "Internal error: truncated write to cache\n");
2191 if (l
< cache_size
) {
2196 cache
+= cache_size
;
2197 total_len
+= cache_size
;
2203 d
->size
= total_len
;
2204 if (total_len
> size
) total_len
= size
;
2206 /* read from off 0 */
2207 memcpy(buf
, d
->buf
, total_len
);
2216 * FUSE ops for /proc
2219 static unsigned long get_memlimit(const char *cgroup
)
2221 char *memlimit_str
= NULL
;
2222 unsigned long memlimit
= -1;
2224 if (cgfs_get_value("memory", cgroup
, "memory.limit_in_bytes", &memlimit_str
))
2225 memlimit
= strtoul(memlimit_str
, NULL
, 10);
2232 static unsigned long get_min_memlimit(const char *cgroup
)
2234 char *copy
= strdupa(cgroup
);
2235 unsigned long memlimit
= 0, retlimit
;
2237 retlimit
= get_memlimit(copy
);
2239 while (strcmp(copy
, "/") != 0) {
2240 copy
= dirname(copy
);
2241 memlimit
= get_memlimit(copy
);
2242 if (memlimit
!= -1 && memlimit
< retlimit
)
2243 retlimit
= memlimit
;
2249 static int proc_meminfo_read(char *buf
, size_t size
, off_t offset
,
2250 struct fuse_file_info
*fi
)
2252 struct fuse_context
*fc
= fuse_get_context();
2253 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2255 char *memusage_str
= NULL
, *memstat_str
= NULL
,
2256 *memswlimit_str
= NULL
, *memswusage_str
= NULL
,
2257 *memswlimit_default_str
= NULL
, *memswusage_default_str
= NULL
;
2258 unsigned long memlimit
= 0, memusage
= 0, memswlimit
= 0, memswusage
= 0,
2259 cached
= 0, hosttotal
= 0;
2261 size_t linelen
= 0, total_len
= 0, rv
= 0;
2262 char *cache
= d
->buf
;
2263 size_t cache_size
= d
->buflen
;
2267 if (offset
> d
->size
)
2271 int left
= d
->size
- offset
;
2272 total_len
= left
> size
? size
: left
;
2273 memcpy(buf
, cache
+ offset
, total_len
);
2277 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2280 cg
= get_pid_cgroup(initpid
, "memory");
2282 return read_file("/proc/meminfo", buf
, size
, d
);
2284 memlimit
= get_min_memlimit(cg
);
2285 if (!cgfs_get_value("memory", cg
, "memory.usage_in_bytes", &memusage_str
))
2287 if (!cgfs_get_value("memory", cg
, "memory.stat", &memstat_str
))
2290 // Following values are allowed to fail, because swapaccount might be turned
2291 // off for current kernel
2292 if(cgfs_get_value("memory", cg
, "memory.memsw.limit_in_bytes", &memswlimit_str
) &&
2293 cgfs_get_value("memory", cg
, "memory.memsw.usage_in_bytes", &memswusage_str
))
2295 /* If swapaccounting is turned on, then default value is assumed to be that of cgroup / */
2296 if (!cgfs_get_value("memory", "/", "memory.memsw.limit_in_bytes", &memswlimit_default_str
))
2298 if (!cgfs_get_value("memory", "/", "memory.memsw.usage_in_bytes", &memswusage_default_str
))
2301 memswlimit
= strtoul(memswlimit_str
, NULL
, 10);
2302 memswusage
= strtoul(memswusage_str
, NULL
, 10);
2304 if (!strcmp(memswlimit_str
, memswlimit_default_str
))
2306 if (!strcmp(memswusage_str
, memswusage_default_str
))
2309 memswlimit
= memswlimit
/ 1024;
2310 memswusage
= memswusage
/ 1024;
2313 memusage
= strtoul(memusage_str
, NULL
, 10);
2317 get_mem_cached(memstat_str
, &cached
);
2319 f
= fopen("/proc/meminfo", "r");
2323 while (getline(&line
, &linelen
, f
) != -1) {
2325 char *printme
, lbuf
[100];
2327 memset(lbuf
, 0, 100);
2328 if (startswith(line
, "MemTotal:")) {
2329 sscanf(line
+14, "%lu", &hosttotal
);
2330 if (hosttotal
< memlimit
)
2331 memlimit
= hosttotal
;
2332 snprintf(lbuf
, 100, "MemTotal: %8lu kB\n", memlimit
);
2334 } else if (startswith(line
, "MemFree:")) {
2335 snprintf(lbuf
, 100, "MemFree: %8lu kB\n", memlimit
- memusage
);
2337 } else if (startswith(line
, "MemAvailable:")) {
2338 snprintf(lbuf
, 100, "MemAvailable: %8lu kB\n", memlimit
- memusage
);
2340 } else if (startswith(line
, "SwapTotal:") && memswlimit
> 0) {
2341 snprintf(lbuf
, 100, "SwapTotal: %8lu kB\n", memswlimit
- memlimit
);
2343 } else if (startswith(line
, "SwapFree:") && memswlimit
> 0 && memswusage
> 0) {
2344 snprintf(lbuf
, 100, "SwapFree: %8lu kB\n",
2345 (memswlimit
- memlimit
) - (memswusage
- memusage
));
2347 } else if (startswith(line
, "Buffers:")) {
2348 snprintf(lbuf
, 100, "Buffers: %8lu kB\n", 0UL);
2350 } else if (startswith(line
, "Cached:")) {
2351 snprintf(lbuf
, 100, "Cached: %8lu kB\n", cached
);
2353 } else if (startswith(line
, "SwapCached:")) {
2354 snprintf(lbuf
, 100, "SwapCached: %8lu kB\n", 0UL);
2359 l
= snprintf(cache
, cache_size
, "%s", printme
);
2361 perror("Error writing to cache");
2366 if (l
>= cache_size
) {
2367 fprintf(stderr
, "Internal error: truncated write to cache\n");
2378 d
->size
= total_len
;
2379 if (total_len
> size
) total_len
= size
;
2380 memcpy(buf
, d
->buf
, total_len
);
2389 free(memswlimit_str
);
2390 free(memswusage_str
);
2392 free(memswlimit_default_str
);
2393 free(memswusage_default_str
);
2398 * Read the cpuset.cpus for cg
2399 * Return the answer in a newly allocated string which must be freed
2401 static char *get_cpuset(const char *cg
)
2405 if (!cgfs_get_value("cpuset", cg
, "cpuset.cpus", &answer
))
2410 bool cpu_in_cpuset(int cpu
, const char *cpuset
);
2412 static bool cpuline_in_cpuset(const char *line
, const char *cpuset
)
2416 if (sscanf(line
, "processor : %d", &cpu
) != 1)
2418 return cpu_in_cpuset(cpu
, cpuset
);
2422 * check whether this is a '^processor" line in /proc/cpuinfo
2424 static bool is_processor_line(const char *line
)
2428 if (sscanf(line
, "processor : %d", &cpu
) == 1)
2433 static int proc_cpuinfo_read(char *buf
, size_t size
, off_t offset
,
2434 struct fuse_file_info
*fi
)
2436 struct fuse_context
*fc
= fuse_get_context();
2437 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2439 char *cpuset
= NULL
;
2441 size_t linelen
= 0, total_len
= 0, rv
= 0;
2442 bool am_printing
= false;
2444 char *cache
= d
->buf
;
2445 size_t cache_size
= d
->buflen
;
2449 if (offset
> d
->size
)
2453 int left
= d
->size
- offset
;
2454 total_len
= left
> size
? size
: left
;
2455 memcpy(buf
, cache
+ offset
, total_len
);
2459 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2462 cg
= get_pid_cgroup(initpid
, "cpuset");
2464 return read_file("proc/cpuinfo", buf
, size
, d
);
2466 cpuset
= get_cpuset(cg
);
2470 f
= fopen("/proc/cpuinfo", "r");
2474 while (getline(&line
, &linelen
, f
) != -1) {
2476 if (is_processor_line(line
)) {
2477 am_printing
= cpuline_in_cpuset(line
, cpuset
);
2480 l
= snprintf(cache
, cache_size
, "processor : %d\n", curcpu
);
2482 perror("Error writing to cache");
2486 if (l
>= cache_size
) {
2487 fprintf(stderr
, "Internal error: truncated write to cache\n");
2491 if (l
< cache_size
){
2496 cache
+= cache_size
;
2497 total_len
+= cache_size
;
2505 l
= snprintf(cache
, cache_size
, "%s", line
);
2507 perror("Error writing to cache");
2511 if (l
>= cache_size
) {
2512 fprintf(stderr
, "Internal error: truncated write to cache\n");
2516 if (l
< cache_size
) {
2521 cache
+= cache_size
;
2522 total_len
+= cache_size
;
2530 d
->size
= total_len
;
2531 if (total_len
> size
) total_len
= size
;
2533 /* read from off 0 */
2534 memcpy(buf
, d
->buf
, total_len
);
2545 static int proc_stat_read(char *buf
, size_t size
, off_t offset
,
2546 struct fuse_file_info
*fi
)
2548 struct fuse_context
*fc
= fuse_get_context();
2549 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2551 char *cpuset
= NULL
;
2553 size_t linelen
= 0, total_len
= 0, rv
= 0;
2554 int curcpu
= -1; /* cpu numbering starts at 0 */
2555 unsigned long user
= 0, nice
= 0, system
= 0, idle
= 0, iowait
= 0, irq
= 0, softirq
= 0, steal
= 0, guest
= 0;
2556 unsigned long user_sum
= 0, nice_sum
= 0, system_sum
= 0, idle_sum
= 0, iowait_sum
= 0,
2557 irq_sum
= 0, softirq_sum
= 0, steal_sum
= 0, guest_sum
= 0;
2558 #define CPUALL_MAX_SIZE BUF_RESERVE_SIZE
2559 char cpuall
[CPUALL_MAX_SIZE
];
2560 /* reserve for cpu all */
2561 char *cache
= d
->buf
+ CPUALL_MAX_SIZE
;
2562 size_t cache_size
= d
->buflen
- CPUALL_MAX_SIZE
;
2566 if (offset
> d
->size
)
2570 int left
= d
->size
- offset
;
2571 total_len
= left
> size
? size
: left
;
2572 memcpy(buf
, d
->buf
+ offset
, total_len
);
2576 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2579 cg
= get_pid_cgroup(initpid
, "cpuset");
2581 return read_file("/proc/stat", buf
, size
, d
);
2583 cpuset
= get_cpuset(cg
);
2587 f
= fopen("/proc/stat", "r");
2592 if (getline(&line
, &linelen
, f
) < 0) {
2593 fprintf(stderr
, "proc_stat_read read first line failed\n");
2597 while (getline(&line
, &linelen
, f
) != -1) {
2600 char cpu_char
[10]; /* That's a lot of cores */
2603 if (sscanf(line
, "cpu%9[^ ]", cpu_char
) != 1) {
2604 /* not a ^cpuN line containing a number N, just print it */
2605 l
= snprintf(cache
, cache_size
, "%s", line
);
2607 perror("Error writing to cache");
2611 if (l
>= cache_size
) {
2612 fprintf(stderr
, "Internal error: truncated write to cache\n");
2616 if (l
< cache_size
) {
2622 //no more space, break it
2623 cache
+= cache_size
;
2624 total_len
+= cache_size
;
2630 if (sscanf(cpu_char
, "%d", &cpu
) != 1)
2632 if (!cpu_in_cpuset(cpu
, cpuset
))
2636 c
= strchr(line
, ' ');
2639 l
= snprintf(cache
, cache_size
, "cpu%d%s", curcpu
, c
);
2641 perror("Error writing to cache");
2646 if (l
>= cache_size
) {
2647 fprintf(stderr
, "Internal error: truncated write to cache\n");
2656 if (sscanf(line
, "%*s %lu %lu %lu %lu %lu %lu %lu %lu %lu", &user
, &nice
, &system
, &idle
, &iowait
, &irq
,
2657 &softirq
, &steal
, &guest
) != 9)
2661 system_sum
+= system
;
2663 iowait_sum
+= iowait
;
2665 softirq_sum
+= softirq
;
2672 int cpuall_len
= snprintf(cpuall
, CPUALL_MAX_SIZE
, "%s %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
2673 "cpu ", user_sum
, nice_sum
, system_sum
, idle_sum
, iowait_sum
, irq_sum
, softirq_sum
, steal_sum
, guest_sum
);
2674 if (cpuall_len
> 0 && cpuall_len
< CPUALL_MAX_SIZE
){
2675 memcpy(cache
, cpuall
, cpuall_len
);
2676 cache
+= cpuall_len
;
2678 /* shouldn't happen */
2679 fprintf(stderr
, "proc_stat_read copy cpuall failed, cpuall_len=%d\n", cpuall_len
);
2683 memmove(cache
, d
->buf
+ CPUALL_MAX_SIZE
, total_len
);
2684 total_len
+= cpuall_len
;
2686 d
->size
= total_len
;
2687 if (total_len
> size
) total_len
= size
;
2689 memcpy(buf
, d
->buf
, total_len
);
2701 static long int getreaperage(pid_t pid
)
2708 qpid
= lookup_initpid_in_store(pid
);
2712 ret
= snprintf(fnam
, 100, "/proc/%d", qpid
);
2713 if (ret
< 0 || ret
>= 100)
2716 if (lstat(fnam
, &sb
) < 0)
2719 return time(NULL
) - sb
.st_ctime
;
2722 static unsigned long get_reaper_busy(pid_t task
)
2724 pid_t initpid
= lookup_initpid_in_store(task
);
2725 char *cgroup
= NULL
, *usage_str
= NULL
;
2726 unsigned long usage
= 0;
2731 cgroup
= get_pid_cgroup(initpid
, "cpuacct");
2734 if (!cgfs_get_value("cpuacct", cgroup
, "cpuacct.usage", &usage_str
))
2736 usage
= strtoul(usage_str
, NULL
, 10);
2737 usage
/= 1000000000;
2746 * We read /proc/uptime and reuse its second field.
2747 * For the first field, we use the mtime for the reaper for
2748 * the calling pid as returned by getreaperage
2750 static int proc_uptime_read(char *buf
, size_t size
, off_t offset
,
2751 struct fuse_file_info
*fi
)
2753 struct fuse_context
*fc
= fuse_get_context();
2754 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2755 long int reaperage
= getreaperage(fc
->pid
);
2756 unsigned long int busytime
= get_reaper_busy(fc
->pid
), idletime
;
2757 char *cache
= d
->buf
;
2758 size_t total_len
= 0;
2761 if (offset
> d
->size
)
2765 int left
= d
->size
- offset
;
2766 total_len
= left
> size
? size
: left
;
2767 memcpy(buf
, cache
+ offset
, total_len
);
2771 idletime
= reaperage
- busytime
;
2772 if (idletime
> reaperage
)
2773 idletime
= reaperage
;
2775 total_len
= snprintf(d
->buf
, d
->size
, "%ld.0 %lu.0\n", reaperage
, idletime
);
2777 perror("Error writing to cache");
2781 d
->size
= (int)total_len
;
2784 if (total_len
> size
) total_len
= size
;
2786 memcpy(buf
, d
->buf
, total_len
);
2790 static int proc_diskstats_read(char *buf
, size_t size
, off_t offset
,
2791 struct fuse_file_info
*fi
)
2794 struct fuse_context
*fc
= fuse_get_context();
2795 struct file_info
*d
= (struct file_info
*)fi
->fh
;
2797 char *io_serviced_str
= NULL
, *io_merged_str
= NULL
, *io_service_bytes_str
= NULL
,
2798 *io_wait_time_str
= NULL
, *io_service_time_str
= NULL
;
2799 unsigned long read
= 0, write
= 0;
2800 unsigned long read_merged
= 0, write_merged
= 0;
2801 unsigned long read_sectors
= 0, write_sectors
= 0;
2802 unsigned long read_ticks
= 0, write_ticks
= 0;
2803 unsigned long ios_pgr
= 0, tot_ticks
= 0, rq_ticks
= 0;
2804 unsigned long rd_svctm
= 0, wr_svctm
= 0, rd_wait
= 0, wr_wait
= 0;
2805 char *cache
= d
->buf
;
2806 size_t cache_size
= d
->buflen
;
2808 size_t linelen
= 0, total_len
= 0, rv
= 0;
2809 unsigned int major
= 0, minor
= 0;
2814 if (offset
> d
->size
)
2818 int left
= d
->size
- offset
;
2819 total_len
= left
> size
? size
: left
;
2820 memcpy(buf
, cache
+ offset
, total_len
);
2824 pid_t initpid
= lookup_initpid_in_store(fc
->pid
);
2827 cg
= get_pid_cgroup(initpid
, "blkio");
2829 return read_file("/proc/diskstats", buf
, size
, d
);
2831 if (!cgfs_get_value("blkio", cg
, "blkio.io_serviced", &io_serviced_str
))
2833 if (!cgfs_get_value("blkio", cg
, "blkio.io_merged", &io_merged_str
))
2835 if (!cgfs_get_value("blkio", cg
, "blkio.io_service_bytes", &io_service_bytes_str
))
2837 if (!cgfs_get_value("blkio", cg
, "blkio.io_wait_time", &io_wait_time_str
))
2839 if (!cgfs_get_value("blkio", cg
, "blkio.io_service_time", &io_service_time_str
))
2843 f
= fopen("/proc/diskstats", "r");
2847 while (getline(&line
, &linelen
, f
) != -1) {
2849 char *printme
, lbuf
[256];
2851 i
= sscanf(line
, "%u %u %71s", &major
, &minor
, dev_name
);
2853 get_blkio_io_value(io_serviced_str
, major
, minor
, "Read", &read
);
2854 get_blkio_io_value(io_serviced_str
, major
, minor
, "Write", &write
);
2855 get_blkio_io_value(io_merged_str
, major
, minor
, "Read", &read_merged
);
2856 get_blkio_io_value(io_merged_str
, major
, minor
, "Write", &write_merged
);
2857 get_blkio_io_value(io_service_bytes_str
, major
, minor
, "Read", &read_sectors
);
2858 read_sectors
= read_sectors
/512;
2859 get_blkio_io_value(io_service_bytes_str
, major
, minor
, "Write", &write_sectors
);
2860 write_sectors
= write_sectors
/512;
2862 get_blkio_io_value(io_service_time_str
, major
, minor
, "Read", &rd_svctm
);
2863 rd_svctm
= rd_svctm
/1000000;
2864 get_blkio_io_value(io_wait_time_str
, major
, minor
, "Read", &rd_wait
);
2865 rd_wait
= rd_wait
/1000000;
2866 read_ticks
= rd_svctm
+ rd_wait
;
2868 get_blkio_io_value(io_service_time_str
, major
, minor
, "Write", &wr_svctm
);
2869 wr_svctm
= wr_svctm
/1000000;
2870 get_blkio_io_value(io_wait_time_str
, major
, minor
, "Write", &wr_wait
);
2871 wr_wait
= wr_wait
/1000000;
2872 write_ticks
= wr_svctm
+ wr_wait
;
2874 get_blkio_io_value(io_service_time_str
, major
, minor
, "Total", &tot_ticks
);
2875 tot_ticks
= tot_ticks
/1000000;
2880 memset(lbuf
, 0, 256);
2881 if (read
|| write
|| read_merged
|| write_merged
|| read_sectors
|| write_sectors
|| read_ticks
|| write_ticks
) {
2882 snprintf(lbuf
, 256, "%u %u %s %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
2883 major
, minor
, dev_name
, read
, read_merged
, read_sectors
, read_ticks
,
2884 write
, write_merged
, write_sectors
, write_ticks
, ios_pgr
, tot_ticks
, rq_ticks
);
2889 l
= snprintf(cache
, cache_size
, "%s", printme
);
2891 perror("Error writing to fuse buf");
2895 if (l
>= cache_size
) {
2896 fprintf(stderr
, "Internal error: truncated write to cache\n");
2906 d
->size
= total_len
;
2907 if (total_len
> size
) total_len
= size
;
2908 memcpy(buf
, d
->buf
, total_len
);
2916 free(io_serviced_str
);
2917 free(io_merged_str
);
2918 free(io_service_bytes_str
);
2919 free(io_wait_time_str
);
2920 free(io_service_time_str
);
2924 static off_t
get_procfile_size(const char *which
)
2926 FILE *f
= fopen(which
, "r");
2929 ssize_t sz
, answer
= 0;
2933 while ((sz
= getline(&line
, &len
, f
)) != -1)
2941 static int proc_getattr(const char *path
, struct stat
*sb
)
2943 struct timespec now
;
2945 memset(sb
, 0, sizeof(struct stat
));
2946 if (clock_gettime(CLOCK_REALTIME
, &now
) < 0)
2948 sb
->st_uid
= sb
->st_gid
= 0;
2949 sb
->st_atim
= sb
->st_mtim
= sb
->st_ctim
= now
;
2950 if (strcmp(path
, "/proc") == 0) {
2951 sb
->st_mode
= S_IFDIR
| 00555;
2955 if (strcmp(path
, "/proc/meminfo") == 0 ||
2956 strcmp(path
, "/proc/cpuinfo") == 0 ||
2957 strcmp(path
, "/proc/uptime") == 0 ||
2958 strcmp(path
, "/proc/stat") == 0 ||
2959 strcmp(path
, "/proc/diskstats") == 0) {
2961 sb
->st_mode
= S_IFREG
| 00444;
2969 static int proc_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
2970 struct fuse_file_info
*fi
)
2972 if (filler(buf
, "cpuinfo", NULL
, 0) != 0 ||
2973 filler(buf
, "meminfo", NULL
, 0) != 0 ||
2974 filler(buf
, "stat", NULL
, 0) != 0 ||
2975 filler(buf
, "uptime", NULL
, 0) != 0 ||
2976 filler(buf
, "diskstats", NULL
, 0) != 0)
2981 static int proc_open(const char *path
, struct fuse_file_info
*fi
)
2984 struct file_info
*info
;
2986 if (strcmp(path
, "/proc/meminfo") == 0)
2987 type
= LXC_TYPE_PROC_MEMINFO
;
2988 else if (strcmp(path
, "/proc/cpuinfo") == 0)
2989 type
= LXC_TYPE_PROC_CPUINFO
;
2990 else if (strcmp(path
, "/proc/uptime") == 0)
2991 type
= LXC_TYPE_PROC_UPTIME
;
2992 else if (strcmp(path
, "/proc/stat") == 0)
2993 type
= LXC_TYPE_PROC_STAT
;
2994 else if (strcmp(path
, "/proc/diskstats") == 0)
2995 type
= LXC_TYPE_PROC_DISKSTATS
;
2999 info
= malloc(sizeof(*info
));
3003 memset(info
, 0, sizeof(*info
));
3006 info
->buflen
= get_procfile_size(path
) + BUF_RESERVE_SIZE
;
3008 info
->buf
= malloc(info
->buflen
);
3009 } while (!info
->buf
);
3010 memset(info
->buf
, 0, info
->buflen
);
3011 /* set actual size to buffer size */
3012 info
->size
= info
->buflen
;
3014 fi
->fh
= (unsigned long)info
;
3018 static int proc_release(const char *path
, struct fuse_file_info
*fi
)
3020 struct file_info
*f
= (struct file_info
*)fi
->fh
;
3022 do_release_file_info(f
);
3026 static int proc_read(const char *path
, char *buf
, size_t size
, off_t offset
,
3027 struct fuse_file_info
*fi
)
3029 struct file_info
*f
= (struct file_info
*) fi
->fh
;
3032 case LXC_TYPE_PROC_MEMINFO
:
3033 return proc_meminfo_read(buf
, size
, offset
, fi
);
3034 case LXC_TYPE_PROC_CPUINFO
:
3035 return proc_cpuinfo_read(buf
, size
, offset
, fi
);
3036 case LXC_TYPE_PROC_UPTIME
:
3037 return proc_uptime_read(buf
, size
, offset
, fi
);
3038 case LXC_TYPE_PROC_STAT
:
3039 return proc_stat_read(buf
, size
, offset
, fi
);
3040 case LXC_TYPE_PROC_DISKSTATS
:
3041 return proc_diskstats_read(buf
, size
, offset
, fi
);
3049 * these just delegate to the /proc and /cgroup ops as
3053 static int lxcfs_getattr(const char *path
, struct stat
*sb
)
3055 if (strcmp(path
, "/") == 0) {
3056 sb
->st_mode
= S_IFDIR
| 00755;
3060 if (strncmp(path
, "/cgroup", 7) == 0) {
3061 return cg_getattr(path
, sb
);
3063 if (strncmp(path
, "/proc", 5) == 0) {
3064 return proc_getattr(path
, sb
);
3069 static int lxcfs_opendir(const char *path
, struct fuse_file_info
*fi
)
3071 if (strcmp(path
, "/") == 0)
3074 if (strncmp(path
, "/cgroup", 7) == 0) {
3075 return cg_opendir(path
, fi
);
3077 if (strcmp(path
, "/proc") == 0)
3082 static int lxcfs_readdir(const char *path
, void *buf
, fuse_fill_dir_t filler
, off_t offset
,
3083 struct fuse_file_info
*fi
)
3085 if (strcmp(path
, "/") == 0) {
3086 if (filler(buf
, "proc", NULL
, 0) != 0 ||
3087 filler(buf
, "cgroup", NULL
, 0) != 0)
3091 if (strncmp(path
, "/cgroup", 7) == 0)
3092 return cg_readdir(path
, buf
, filler
, offset
, fi
);
3093 if (strcmp(path
, "/proc") == 0)
3094 return proc_readdir(path
, buf
, filler
, offset
, fi
);
3098 static int lxcfs_releasedir(const char *path
, struct fuse_file_info
*fi
)
3100 if (strcmp(path
, "/") == 0)
3102 if (strncmp(path
, "/cgroup", 7) == 0) {
3103 return cg_releasedir(path
, fi
);
3105 if (strcmp(path
, "/proc") == 0)
3110 static int lxcfs_open(const char *path
, struct fuse_file_info
*fi
)
3112 if (strncmp(path
, "/cgroup", 7) == 0)
3113 return cg_open(path
, fi
);
3114 if (strncmp(path
, "/proc", 5) == 0)
3115 return proc_open(path
, fi
);
3120 static int lxcfs_read(const char *path
, char *buf
, size_t size
, off_t offset
,
3121 struct fuse_file_info
*fi
)
3123 if (strncmp(path
, "/cgroup", 7) == 0)
3124 return cg_read(path
, buf
, size
, offset
, fi
);
3125 if (strncmp(path
, "/proc", 5) == 0)
3126 return proc_read(path
, buf
, size
, offset
, fi
);
3131 int lxcfs_write(const char *path
, const char *buf
, size_t size
, off_t offset
,
3132 struct fuse_file_info
*fi
)
3134 if (strncmp(path
, "/cgroup", 7) == 0) {
3135 return cg_write(path
, buf
, size
, offset
, fi
);
3141 static int lxcfs_flush(const char *path
, struct fuse_file_info
*fi
)
3146 static int lxcfs_release(const char *path
, struct fuse_file_info
*fi
)
3148 if (strncmp(path
, "/cgroup", 7) == 0)
3149 return cg_release(path
, fi
);
3150 if (strncmp(path
, "/proc", 5) == 0)
3151 return proc_release(path
, fi
);
3156 static int lxcfs_fsync(const char *path
, int datasync
, struct fuse_file_info
*fi
)
3161 int lxcfs_mkdir(const char *path
, mode_t mode
)
3163 if (strncmp(path
, "/cgroup", 7) == 0)
3164 return cg_mkdir(path
, mode
);
3169 int lxcfs_chown(const char *path
, uid_t uid
, gid_t gid
)
3171 if (strncmp(path
, "/cgroup", 7) == 0)
3172 return cg_chown(path
, uid
, gid
);
3178 * cat first does a truncate before doing ops->write. This doesn't
3179 * really make sense for cgroups. So just return 0 always but do
3182 int lxcfs_truncate(const char *path
, off_t newsize
)
3184 if (strncmp(path
, "/cgroup", 7) == 0)
3189 int lxcfs_rmdir(const char *path
)
3191 if (strncmp(path
, "/cgroup", 7) == 0)
3192 return cg_rmdir(path
);
3196 int lxcfs_chmod(const char *path
, mode_t mode
)
3198 if (strncmp(path
, "/cgroup", 7) == 0)
3199 return cg_chmod(path
, mode
);
3203 const struct fuse_operations lxcfs_ops
= {
3204 .getattr
= lxcfs_getattr
,
3208 .mkdir
= lxcfs_mkdir
,
3210 .rmdir
= lxcfs_rmdir
,
3214 .chmod
= lxcfs_chmod
,
3215 .chown
= lxcfs_chown
,
3216 .truncate
= lxcfs_truncate
,
3221 .release
= lxcfs_release
,
3222 .write
= lxcfs_write
,
3225 .flush
= lxcfs_flush
,
3226 .fsync
= lxcfs_fsync
,
3231 .removexattr
= NULL
,
3233 .opendir
= lxcfs_opendir
,
3234 .readdir
= lxcfs_readdir
,
3235 .releasedir
= lxcfs_releasedir
,
3246 static void usage(const char *me
)
3248 fprintf(stderr
, "Usage:\n");
3249 fprintf(stderr
, "\n");
3250 fprintf(stderr
, "%s mountpoint\n", me
);
3251 fprintf(stderr
, "%s -h\n", me
);
3255 static bool is_help(char *w
)
3257 if (strcmp(w
, "-h") == 0 ||
3258 strcmp(w
, "--help") == 0 ||
3259 strcmp(w
, "-help") == 0 ||
3260 strcmp(w
, "help") == 0)
3265 void swallow_arg(int *argcp
, char *argv
[], char *which
)
3269 for (i
= 1; argv
[i
]; i
++) {
3270 if (strcmp(argv
[i
], which
) != 0)
3272 for (; argv
[i
]; i
++) {
3273 argv
[i
] = argv
[i
+1];
3280 void swallow_option(int *argcp
, char *argv
[], char *opt
, char *v
)
3284 for (i
= 1; argv
[i
]; i
++) {
3287 if (strcmp(argv
[i
], opt
) != 0)
3289 if (strcmp(argv
[i
+1], v
) != 0) {
3290 fprintf(stderr
, "Warning: unexpected fuse option %s\n", v
);
3293 for (; argv
[i
+1]; i
++) {
3294 argv
[i
] = argv
[i
+2];
3301 int main(int argc
, char *argv
[])
3305 * what we pass to fuse_main is:
3306 * argv[0] -s -f -o allow_other,directio argv[1] NULL
3308 int nargs
= 5, cnt
= 0;
3312 /* for travis which runs on 12.04 */
3313 if (glib_check_version (2, 36, 0) != NULL
)
3317 /* accomodate older init scripts */
3318 swallow_arg(&argc
, argv
, "-s");
3319 swallow_arg(&argc
, argv
, "-f");
3320 swallow_option(&argc
, argv
, "-o", "allow_other");
3322 if (argc
== 2 && strcmp(argv
[1], "--version") == 0) {
3323 fprintf(stderr
, "%s\n", VERSION
);
3326 if (argc
!= 2 || is_help(argv
[1]))
3329 newargv
[cnt
++] = argv
[0];
3330 newargv
[cnt
++] = "-f";
3331 newargv
[cnt
++] = "-o";
3332 newargv
[cnt
++] = "allow_other,direct_io,entry_timeout=0.5,attr_timeout=0.5";
3333 newargv
[cnt
++] = argv
[1];
3334 newargv
[cnt
++] = NULL
;
3336 if (!cgfs_setup_controllers())
3339 ret
= fuse_main(nargs
, newargv
, &lxcfs_ops
, NULL
);