]>
Commit | Line | Data |
---|---|---|
db0463bf | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
580fe4df | 2 | |
1f5596dd CB |
3 | #ifndef _GNU_SOURCE |
4 | #define _GNU_SOURCE | |
5 | #endif | |
6 | ||
7 | #ifndef FUSE_USE_VERSION | |
580fe4df | 8 | #define FUSE_USE_VERSION 26 |
1f5596dd CB |
9 | #endif |
10 | ||
11 | #define _FILE_OFFSET_BITS 64 | |
580fe4df CB |
12 | |
13 | #define __STDC_FORMAT_MACROS | |
14 | #include <dirent.h> | |
15 | #include <errno.h> | |
16 | #include <fcntl.h> | |
17 | #include <fuse.h> | |
18 | #include <inttypes.h> | |
19 | #include <libgen.h> | |
20 | #include <pthread.h> | |
21 | #include <sched.h> | |
22 | #include <stdarg.h> | |
23 | #include <stdbool.h> | |
24 | #include <stdint.h> | |
25 | #include <stdio.h> | |
26 | #include <stdlib.h> | |
27 | #include <string.h> | |
28 | #include <time.h> | |
29 | #include <unistd.h> | |
30 | #include <wait.h> | |
31 | #include <linux/magic.h> | |
32 | #include <linux/sched.h> | |
33 | #include <sys/epoll.h> | |
34 | #include <sys/mman.h> | |
35 | #include <sys/mount.h> | |
36 | #include <sys/param.h> | |
37 | #include <sys/socket.h> | |
38 | #include <sys/syscall.h> | |
39 | #include <sys/sysinfo.h> | |
40 | #include <sys/vfs.h> | |
41 | ||
42 | #include "bindings.h" | |
43 | #include "config.h" | |
44 | #include "cgroups/cgroup.h" | |
45 | #include "cgroups/cgroup_utils.h" | |
46 | #include "memory_utils.h" | |
47 | #include "utils.h" | |
48 | ||
49 | struct cgfs_files { | |
50 | char *name; | |
51 | uint32_t uid, gid; | |
52 | uint32_t mode; | |
53 | }; | |
54 | ||
55 | struct pid_ns_clone_args { | |
56 | int *cpipe; | |
57 | int sock; | |
58 | pid_t tpid; | |
59 | /* pid_from_ns or pid_to_ns. */ | |
60 | int (*wrapped) (int, pid_t); | |
61 | }; | |
62 | ||
63 | /* | |
64 | * given /cgroup/freezer/a/b, return "freezer". | |
65 | * the returned char* should NOT be freed. | |
66 | */ | |
67 | static char *pick_controller_from_path(struct fuse_context *fc, const char *path) | |
68 | { | |
69 | const char *p1; | |
70 | char *contr, *slash; | |
71 | ||
72 | if (strlen(path) < 9) { | |
73 | errno = EACCES; | |
74 | return NULL; | |
75 | } | |
76 | if (*(path + 7) != '/') { | |
77 | errno = EINVAL; | |
78 | return NULL; | |
79 | } | |
80 | p1 = path + 8; | |
81 | contr = strdupa(p1); | |
82 | if (!contr) { | |
83 | errno = ENOMEM; | |
84 | return NULL; | |
85 | } | |
86 | slash = strstr(contr, "/"); | |
87 | if (slash) | |
88 | *slash = '\0'; | |
89 | ||
90 | for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++) { | |
91 | if ((*h)->__controllers && strcmp((*h)->__controllers, contr) == 0) | |
92 | return (*h)->__controllers; | |
93 | } | |
94 | errno = ENOENT; | |
95 | return NULL; | |
96 | } | |
97 | ||
98 | /* | |
99 | * Find the start of cgroup in /cgroup/controller/the/cgroup/path | |
100 | * Note that the returned value may include files (keynames) etc | |
101 | */ | |
102 | static const char *find_cgroup_in_path(const char *path) | |
103 | { | |
104 | const char *p1; | |
105 | ||
106 | if (strlen(path) < 9) { | |
107 | errno = EACCES; | |
108 | return NULL; | |
109 | } | |
110 | p1 = strstr(path + 8, "/"); | |
111 | if (!p1) { | |
112 | errno = EINVAL; | |
113 | return NULL; | |
114 | } | |
115 | errno = 0; | |
116 | return p1 + 1; | |
117 | } | |
118 | ||
119 | /* | |
120 | * split the last path element from the path in @cg. | |
121 | * @dir is newly allocated and should be freed, @last not | |
122 | */ | |
123 | static void get_cgdir_and_path(const char *cg, char **dir, char **last) | |
124 | { | |
125 | char *p; | |
126 | ||
127 | do { | |
128 | *dir = strdup(cg); | |
129 | } while (!*dir); | |
130 | *last = strrchr(cg, '/'); | |
131 | if (!*last) { | |
132 | *last = NULL; | |
133 | return; | |
134 | } | |
135 | p = strrchr(*dir, '/'); | |
136 | *p = '\0'; | |
137 | } | |
138 | ||
139 | static bool is_child_cgroup(const char *controller, const char *cgroup, const char *f) | |
140 | { | |
141 | int cfd; | |
142 | size_t len; | |
143 | char *fnam; | |
144 | int ret; | |
145 | struct stat sb; | |
146 | ||
147 | cfd = get_cgroup_fd(controller); | |
148 | if (cfd < 0) | |
149 | return false; | |
150 | ||
151 | /* Make sure we pass a relative path to *at() family of functions. | |
152 | * . + /cgroup + / + f + \0 | |
153 | */ | |
154 | len = strlen(cgroup) + strlen(f) + 3; | |
155 | fnam = alloca(len); | |
156 | ret = snprintf(fnam, len, "%s%s/%s", dot_or_empty(cgroup), cgroup, f); | |
157 | if (ret < 0 || (size_t)ret >= len) | |
158 | return false; | |
159 | ||
160 | ret = fstatat(cfd, fnam, &sb, 0); | |
161 | if (ret < 0 || !S_ISDIR(sb.st_mode)) | |
162 | return false; | |
163 | ||
164 | return true; | |
165 | } | |
166 | ||
167 | /* | |
168 | * If pid is in /a/b/c, he may see that /a exists, but not /b or /a/c. | |
169 | */ | |
170 | static bool caller_may_see_dir(pid_t pid, const char *contrl, const char *cg) | |
171 | { | |
172 | bool answer = false; | |
173 | char *c2, *task_cg; | |
174 | size_t target_len, task_len; | |
175 | ||
176 | if (strcmp(cg, "/") == 0 || strcmp(cg, "./") == 0) | |
177 | return true; | |
178 | ||
179 | c2 = get_pid_cgroup(pid, contrl); | |
180 | if (!c2) | |
181 | return false; | |
182 | prune_init_slice(c2); | |
183 | ||
184 | task_cg = c2 + 1; | |
185 | target_len = strlen(cg); | |
186 | task_len = strlen(task_cg); | |
187 | if (task_len == 0) { | |
188 | /* Task is in the root cg, it can see everything. This case is | |
189 | * not handled by the strmcps below, since they test for the | |
190 | * last /, but that is the first / that we've chopped off | |
191 | * above. | |
192 | */ | |
193 | answer = true; | |
194 | goto out; | |
195 | } | |
196 | if (strcmp(cg, task_cg) == 0) { | |
197 | answer = true; | |
198 | goto out; | |
199 | } | |
200 | if (target_len < task_len) { | |
201 | /* looking up a parent dir */ | |
202 | if (strncmp(task_cg, cg, target_len) == 0 && task_cg[target_len] == '/') | |
203 | answer = true; | |
204 | goto out; | |
205 | } | |
206 | if (target_len > task_len) { | |
207 | /* looking up a child dir */ | |
208 | if (strncmp(task_cg, cg, task_len) == 0 && cg[task_len] == '/') | |
209 | answer = true; | |
210 | goto out; | |
211 | } | |
212 | ||
213 | out: | |
214 | free(c2); | |
215 | return answer; | |
216 | } | |
217 | ||
218 | /* | |
219 | * taskcg is a/b/c | |
220 | * querycg is /a/b/c/d/e | |
221 | * we return 'd' | |
222 | */ | |
223 | static char *get_next_cgroup_dir(const char *taskcg, const char *querycg) | |
224 | { | |
225 | char *start, *end; | |
226 | ||
227 | if (strlen(taskcg) <= strlen(querycg)) { | |
228 | lxcfs_error("%s\n", "I was fed bad input."); | |
229 | return NULL; | |
230 | } | |
231 | ||
232 | if ((strcmp(querycg, "/") == 0) || (strcmp(querycg, "./") == 0)) | |
233 | start = strdup(taskcg + 1); | |
234 | else | |
235 | start = strdup(taskcg + strlen(querycg) + 1); | |
236 | if (!start) | |
237 | return NULL; | |
238 | end = strchr(start, '/'); | |
239 | if (end) | |
240 | *end = '\0'; | |
241 | return start; | |
242 | } | |
243 | ||
244 | /* | |
245 | * If pid is in /a/b/c/d, he may only act on things under cg=/a/b/c/d. | |
246 | * If pid is in /a, he may act on /a/b, but not on /b. | |
247 | * if the answer is false and nextcg is not NULL, then *nextcg will point | |
248 | * to a string containing the next cgroup directory under cg, which must be | |
249 | * freed by the caller. | |
250 | */ | |
251 | static bool caller_is_in_ancestor(pid_t pid, const char *contrl, const char *cg, char **nextcg) | |
252 | { | |
253 | bool answer = false; | |
254 | char *c2 = get_pid_cgroup(pid, contrl); | |
255 | char *linecmp; | |
256 | ||
257 | if (!c2) | |
258 | return false; | |
259 | prune_init_slice(c2); | |
260 | ||
261 | /* | |
262 | * callers pass in '/' or './' (openat()) for root cgroup, otherwise | |
263 | * they pass in a cgroup without leading '/' | |
264 | * | |
265 | * The original line here was: | |
266 | * linecmp = *cg == '/' ? c2 : c2+1; | |
267 | * TODO: I'm not sure why you'd want to increment when *cg != '/'? | |
268 | * Serge, do you know? | |
269 | */ | |
270 | if (*cg == '/' || !strncmp(cg, "./", 2)) | |
271 | linecmp = c2; | |
272 | else | |
273 | linecmp = c2 + 1; | |
274 | if (strncmp(linecmp, cg, strlen(linecmp)) != 0) { | |
275 | if (nextcg) { | |
276 | *nextcg = get_next_cgroup_dir(linecmp, cg); | |
277 | } | |
278 | goto out; | |
279 | } | |
280 | answer = true; | |
281 | ||
282 | out: | |
283 | free(c2); | |
284 | return answer; | |
285 | } | |
286 | ||
287 | static struct cgfs_files *cgfs_get_key(const char *controller, | |
288 | const char *cgroup, const char *file) | |
289 | { | |
290 | int ret, cfd; | |
291 | size_t len; | |
292 | char *fnam; | |
293 | struct stat sb; | |
294 | struct cgfs_files *newkey; | |
295 | ||
296 | cfd = get_cgroup_fd(controller); | |
297 | if (cfd < 0) | |
298 | return false; | |
299 | ||
300 | if (file && *file == '/') | |
301 | file++; | |
302 | ||
303 | if (file && strchr(file, '/')) | |
304 | return NULL; | |
305 | ||
306 | /* Make sure we pass a relative path to *at() family of functions. | |
307 | * . + /cgroup + / + file + \0 | |
308 | */ | |
309 | len = strlen(cgroup) + 3; | |
310 | if (file) | |
311 | len += strlen(file) + 1; | |
312 | fnam = alloca(len); | |
313 | snprintf(fnam, len, "%s%s%s%s", dot_or_empty(cgroup), cgroup, | |
314 | file ? "/" : "", file ? file : ""); | |
315 | ||
316 | ret = fstatat(cfd, fnam, &sb, 0); | |
317 | if (ret < 0) | |
318 | return NULL; | |
319 | ||
320 | do { | |
321 | newkey = malloc(sizeof(struct cgfs_files)); | |
322 | } while (!newkey); | |
323 | if (file) | |
324 | newkey->name = must_copy_string(file); | |
325 | else if (strrchr(cgroup, '/')) | |
326 | newkey->name = must_copy_string(strrchr(cgroup, '/')); | |
327 | else | |
328 | newkey->name = must_copy_string(cgroup); | |
329 | newkey->uid = sb.st_uid; | |
330 | newkey->gid = sb.st_gid; | |
331 | newkey->mode = sb.st_mode; | |
332 | ||
333 | return newkey; | |
334 | } | |
335 | ||
336 | /* | |
337 | * Given a open file * to /proc/pid/{u,g}id_map, and an id | |
338 | * valid in the caller's namespace, return the id mapped into | |
339 | * pid's namespace. | |
340 | * Returns the mapped id, or -1 on error. | |
341 | */ | |
342 | static unsigned int convert_id_to_ns(FILE *idfile, unsigned int in_id) | |
343 | { | |
344 | unsigned int nsuid, // base id for a range in the idfile's namespace | |
345 | hostuid, // base id for a range in the caller's namespace | |
346 | count; // number of ids in this range | |
347 | char line[400]; | |
348 | int ret; | |
349 | ||
350 | fseek(idfile, 0L, SEEK_SET); | |
351 | while (fgets(line, 400, idfile)) { | |
352 | ret = sscanf(line, "%u %u %u\n", &nsuid, &hostuid, &count); | |
353 | if (ret != 3) | |
354 | continue; | |
355 | if (hostuid + count < hostuid || nsuid + count < nsuid) { | |
356 | /* | |
357 | * uids wrapped around - unexpected as this is a procfile, | |
358 | * so just bail. | |
359 | */ | |
360 | lxcfs_error("pid wrapparound at entry %u %u %u in %s\n", | |
361 | nsuid, hostuid, count, line); | |
362 | return -1; | |
363 | } | |
364 | if (hostuid <= in_id && hostuid+count > in_id) { | |
365 | /* | |
366 | * now since hostuid <= in_id < hostuid+count, and | |
367 | * hostuid+count and nsuid+count do not wrap around, | |
368 | * we know that nsuid+(in_id-hostuid) which must be | |
369 | * less that nsuid+(count) must not wrap around | |
370 | */ | |
371 | return (in_id - hostuid) + nsuid; | |
372 | } | |
373 | } | |
374 | ||
375 | // no answer found | |
376 | return -1; | |
377 | } | |
378 | ||
379 | /* | |
380 | * for is_privileged_over, | |
381 | * specify whether we require the calling uid to be root in his | |
382 | * namespace | |
383 | */ | |
384 | #define NS_ROOT_REQD true | |
385 | #define NS_ROOT_OPT false | |
386 | ||
387 | #define PROCLEN 100 | |
388 | ||
389 | static bool is_privileged_over(pid_t pid, uid_t uid, uid_t victim, bool req_ns_root) | |
390 | { | |
391 | char fpath[PROCLEN]; | |
392 | int ret; | |
393 | bool answer = false; | |
394 | uid_t nsuid; | |
395 | ||
396 | if (victim == -1 || uid == -1) | |
397 | return false; | |
398 | ||
399 | /* | |
400 | * If the request is one not requiring root in the namespace, | |
401 | * then having the same uid suffices. (i.e. uid 1000 has write | |
402 | * access to files owned by uid 1000 | |
403 | */ | |
404 | if (!req_ns_root && uid == victim) | |
405 | return true; | |
406 | ||
407 | ret = snprintf(fpath, PROCLEN, "/proc/%d/uid_map", pid); | |
408 | if (ret < 0 || ret >= PROCLEN) | |
409 | return false; | |
dbb1f822 | 410 | FILE *f = fopen(fpath, "re"); |
580fe4df CB |
411 | if (!f) |
412 | return false; | |
413 | ||
414 | /* if caller's not root in his namespace, reject */ | |
415 | nsuid = convert_id_to_ns(f, uid); | |
416 | if (nsuid) | |
417 | goto out; | |
418 | ||
419 | /* | |
420 | * If victim is not mapped into caller's ns, reject. | |
421 | * XXX I'm not sure this check is needed given that fuse | |
422 | * will be sending requests where the vfs has converted | |
423 | */ | |
424 | nsuid = convert_id_to_ns(f, victim); | |
425 | if (nsuid == -1) | |
426 | goto out; | |
427 | ||
428 | answer = true; | |
429 | ||
430 | out: | |
431 | fclose(f); | |
432 | return answer; | |
433 | } | |
434 | ||
435 | static bool perms_include(int fmode, mode_t req_mode) | |
436 | { | |
437 | mode_t r; | |
438 | ||
439 | switch (req_mode & O_ACCMODE) { | |
440 | case O_RDONLY: | |
441 | r = S_IROTH; | |
442 | break; | |
443 | case O_WRONLY: | |
444 | r = S_IWOTH; | |
445 | break; | |
446 | case O_RDWR: | |
447 | r = S_IROTH | S_IWOTH; | |
448 | break; | |
449 | default: | |
450 | return false; | |
451 | } | |
452 | return ((fmode & r) == r); | |
453 | } | |
454 | ||
455 | static void free_key(struct cgfs_files *k) | |
456 | { | |
457 | if (!k) | |
458 | return; | |
459 | free_disarm(k->name); | |
460 | free_disarm(k); | |
461 | } | |
462 | ||
463 | /* | |
464 | * check whether a fuse context may access a cgroup dir or file | |
465 | * | |
466 | * If file is not null, it is a cgroup file to check under cg. | |
467 | * If file is null, then we are checking perms on cg itself. | |
468 | * | |
469 | * For files we can check the mode of the list_keys result. | |
470 | * For cgroups, we must make assumptions based on the files under the | |
471 | * cgroup, because cgmanager doesn't tell us ownership/perms of cgroups | |
472 | * yet. | |
473 | */ | |
474 | static bool fc_may_access(struct fuse_context *fc, const char *contrl, const char *cg, const char *file, mode_t mode) | |
475 | { | |
476 | struct cgfs_files *k = NULL; | |
477 | bool ret = false; | |
478 | ||
479 | k = cgfs_get_key(contrl, cg, file); | |
480 | if (!k) | |
481 | return false; | |
482 | ||
483 | if (is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) { | |
484 | if (perms_include(k->mode >> 6, mode)) { | |
485 | ret = true; | |
486 | goto out; | |
487 | } | |
488 | } | |
489 | if (fc->gid == k->gid) { | |
490 | if (perms_include(k->mode >> 3, mode)) { | |
491 | ret = true; | |
492 | goto out; | |
493 | } | |
494 | } | |
495 | ret = perms_include(k->mode, mode); | |
496 | ||
497 | out: | |
498 | free_key(k); | |
499 | return ret; | |
500 | } | |
501 | ||
502 | int cg_getattr(const char *path, struct stat *sb) | |
503 | { | |
504 | struct timespec now; | |
505 | struct fuse_context *fc = fuse_get_context(); | |
506 | char * cgdir = NULL; | |
507 | char *last = NULL, *path1, *path2; | |
508 | struct cgfs_files *k = NULL; | |
509 | const char *cgroup; | |
510 | const char *controller = NULL; | |
511 | int ret = -ENOENT; | |
512 | ||
513 | ||
514 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
515 | return -EIO; | |
516 | ||
517 | memset(sb, 0, sizeof(struct stat)); | |
518 | ||
519 | if (clock_gettime(CLOCK_REALTIME, &now) < 0) | |
520 | return -EINVAL; | |
521 | ||
522 | sb->st_uid = sb->st_gid = 0; | |
523 | sb->st_atim = sb->st_mtim = sb->st_ctim = now; | |
524 | sb->st_size = 0; | |
525 | ||
526 | if (strcmp(path, "/cgroup") == 0) { | |
527 | sb->st_mode = S_IFDIR | 00755; | |
528 | sb->st_nlink = 2; | |
529 | return 0; | |
530 | } | |
531 | ||
532 | controller = pick_controller_from_path(fc, path); | |
533 | if (!controller) | |
534 | return -errno; | |
535 | cgroup = find_cgroup_in_path(path); | |
536 | if (!cgroup) { | |
537 | /* this is just /cgroup/controller, return it as a dir */ | |
538 | sb->st_mode = S_IFDIR | 00755; | |
539 | sb->st_nlink = 2; | |
540 | return 0; | |
541 | } | |
542 | ||
543 | get_cgdir_and_path(cgroup, &cgdir, &last); | |
544 | ||
545 | if (!last) { | |
546 | path1 = "/"; | |
547 | path2 = cgdir; | |
548 | } else { | |
549 | path1 = cgdir; | |
550 | path2 = last; | |
551 | } | |
552 | ||
553 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
554 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
555 | initpid = fc->pid; | |
556 | /* check that cgcopy is either a child cgroup of cgdir, or listed in its keys. | |
557 | * Then check that caller's cgroup is under path if last is a child | |
558 | * cgroup, or cgdir if last is a file */ | |
559 | ||
560 | if (is_child_cgroup(controller, path1, path2)) { | |
561 | if (!caller_may_see_dir(initpid, controller, cgroup)) { | |
562 | ret = -ENOENT; | |
563 | goto out; | |
564 | } | |
565 | if (!caller_is_in_ancestor(initpid, controller, cgroup, NULL)) { | |
566 | /* this is just /cgroup/controller, return it as a dir */ | |
567 | sb->st_mode = S_IFDIR | 00555; | |
568 | sb->st_nlink = 2; | |
569 | ret = 0; | |
570 | goto out; | |
571 | } | |
572 | if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) { | |
573 | ret = -EACCES; | |
574 | goto out; | |
575 | } | |
576 | ||
577 | // get uid, gid, from '/tasks' file and make up a mode | |
578 | // That is a hack, until cgmanager gains a GetCgroupPerms fn. | |
579 | sb->st_mode = S_IFDIR | 00755; | |
580 | k = cgfs_get_key(controller, cgroup, NULL); | |
581 | if (!k) { | |
582 | sb->st_uid = sb->st_gid = 0; | |
583 | } else { | |
584 | sb->st_uid = k->uid; | |
585 | sb->st_gid = k->gid; | |
586 | } | |
587 | free_key(k); | |
588 | sb->st_nlink = 2; | |
589 | ret = 0; | |
590 | goto out; | |
591 | } | |
592 | ||
593 | if ((k = cgfs_get_key(controller, path1, path2)) != NULL) { | |
594 | sb->st_mode = S_IFREG | k->mode; | |
595 | sb->st_nlink = 1; | |
596 | sb->st_uid = k->uid; | |
597 | sb->st_gid = k->gid; | |
598 | sb->st_size = 0; | |
599 | free_key(k); | |
600 | if (!caller_is_in_ancestor(initpid, controller, path1, NULL)) { | |
601 | ret = -ENOENT; | |
602 | goto out; | |
603 | } | |
604 | ret = 0; | |
605 | } | |
606 | ||
607 | out: | |
608 | free(cgdir); | |
609 | return ret; | |
610 | } | |
611 | ||
612 | /* | |
613 | * Chown all the files in the cgroup directory. We do this when we create a | |
614 | * cgroup on behalf of a user. | |
615 | */ | |
616 | static void chown_all_cgroup_files(const char *dirname, uid_t uid, gid_t gid, int fd) | |
617 | { | |
618 | struct dirent *direntp; | |
619 | char path[MAXPATHLEN]; | |
620 | size_t len; | |
621 | DIR *d; | |
622 | int fd1, ret; | |
623 | ||
624 | len = strlen(dirname); | |
625 | if (len >= MAXPATHLEN) { | |
626 | lxcfs_error("Pathname too long: %s\n", dirname); | |
627 | return; | |
628 | } | |
629 | ||
630 | fd1 = openat(fd, dirname, O_DIRECTORY); | |
631 | if (fd1 < 0) | |
632 | return; | |
633 | ||
634 | d = fdopendir(fd1); | |
635 | if (!d) { | |
636 | lxcfs_error("Failed to open %s\n", dirname); | |
637 | return; | |
638 | } | |
639 | ||
640 | while ((direntp = readdir(d))) { | |
641 | if (!strcmp(direntp->d_name, ".") || !strcmp(direntp->d_name, "..")) | |
642 | continue; | |
643 | ret = snprintf(path, MAXPATHLEN, "%s/%s", dirname, direntp->d_name); | |
644 | if (ret < 0 || ret >= MAXPATHLEN) { | |
645 | lxcfs_error("Pathname too long under %s\n", dirname); | |
646 | continue; | |
647 | } | |
648 | if (fchownat(fd, path, uid, gid, 0) < 0) | |
649 | lxcfs_error("Failed to chown file %s to %u:%u", path, uid, gid); | |
650 | } | |
651 | closedir(d); | |
652 | } | |
653 | ||
654 | static int cgfs_create(const char *controller, const char *cg, uid_t uid, gid_t gid) | |
655 | { | |
656 | int cfd; | |
657 | size_t len; | |
658 | char *dirnam; | |
659 | ||
660 | cfd = get_cgroup_fd(controller); | |
661 | if (cfd < 0) | |
662 | return -EINVAL; | |
663 | ||
664 | /* Make sure we pass a relative path to *at() family of functions. | |
665 | * . + /cg + \0 | |
666 | */ | |
667 | len = strlen(cg) + 2; | |
668 | dirnam = alloca(len); | |
669 | snprintf(dirnam, len, "%s%s", dot_or_empty(cg), cg); | |
670 | ||
671 | if (mkdirat(cfd, dirnam, 0755) < 0) | |
672 | return -errno; | |
673 | ||
674 | if (uid == 0 && gid == 0) | |
675 | return 0; | |
676 | ||
677 | if (fchownat(cfd, dirnam, uid, gid, 0) < 0) | |
678 | return -errno; | |
679 | ||
680 | chown_all_cgroup_files(dirnam, uid, gid, cfd); | |
681 | ||
682 | return 0; | |
683 | } | |
684 | ||
685 | int cg_mkdir(const char *path, mode_t mode) | |
686 | { | |
687 | struct fuse_context *fc = fuse_get_context(); | |
688 | char *last = NULL, *path1, *cgdir = NULL, *controller, *next = NULL; | |
689 | const char *cgroup; | |
690 | int ret; | |
691 | ||
692 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
693 | return -EIO; | |
694 | ||
695 | controller = pick_controller_from_path(fc, path); | |
696 | if (!controller) | |
697 | return errno == ENOENT ? -EPERM : -errno; | |
698 | ||
699 | cgroup = find_cgroup_in_path(path); | |
700 | if (!cgroup) | |
701 | return -errno; | |
702 | ||
703 | get_cgdir_and_path(cgroup, &cgdir, &last); | |
704 | if (!last) | |
705 | path1 = "/"; | |
706 | else | |
707 | path1 = cgdir; | |
708 | ||
709 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
710 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
711 | initpid = fc->pid; | |
712 | if (!caller_is_in_ancestor(initpid, controller, path1, &next)) { | |
713 | if (!next) | |
714 | ret = -EINVAL; | |
715 | else if (last && strcmp(next, last) == 0) | |
716 | ret = -EEXIST; | |
717 | else | |
718 | ret = -EPERM; | |
719 | goto out; | |
720 | } | |
721 | ||
722 | if (!fc_may_access(fc, controller, path1, NULL, O_RDWR)) { | |
723 | ret = -EACCES; | |
724 | goto out; | |
725 | } | |
726 | if (!caller_is_in_ancestor(initpid, controller, path1, NULL)) { | |
727 | ret = -EACCES; | |
728 | goto out; | |
729 | } | |
730 | ||
731 | ret = cgfs_create(controller, cgroup, fc->uid, fc->gid); | |
732 | ||
733 | out: | |
734 | free(cgdir); | |
735 | free(next); | |
736 | return ret; | |
737 | } | |
738 | ||
739 | static bool recursive_rmdir(const char *dirname, int fd, const int cfd) | |
740 | { | |
741 | struct dirent *direntp; | |
742 | DIR *dir; | |
743 | bool ret = false; | |
744 | char pathname[MAXPATHLEN]; | |
745 | int dupfd; | |
746 | ||
747 | dupfd = dup(fd); // fdopendir() does bad things once it uses an fd. | |
748 | if (dupfd < 0) | |
749 | return false; | |
750 | ||
751 | dir = fdopendir(dupfd); | |
752 | if (!dir) { | |
753 | lxcfs_debug("Failed to open %s: %s.\n", dirname, strerror(errno)); | |
754 | close(dupfd); | |
755 | return false; | |
756 | } | |
757 | ||
758 | while ((direntp = readdir(dir))) { | |
759 | struct stat mystat; | |
760 | int rc; | |
761 | ||
762 | if (!strcmp(direntp->d_name, ".") || | |
763 | !strcmp(direntp->d_name, "..")) | |
764 | continue; | |
765 | ||
766 | rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name); | |
767 | if (rc < 0 || rc >= MAXPATHLEN) { | |
768 | lxcfs_error("%s\n", "Pathname too long."); | |
769 | continue; | |
770 | } | |
771 | ||
772 | rc = fstatat(cfd, pathname, &mystat, AT_SYMLINK_NOFOLLOW); | |
773 | if (rc) { | |
774 | lxcfs_debug("Failed to stat %s: %s.\n", pathname, strerror(errno)); | |
775 | continue; | |
776 | } | |
777 | if (S_ISDIR(mystat.st_mode)) | |
778 | if (!recursive_rmdir(pathname, fd, cfd)) | |
779 | lxcfs_debug("Error removing %s.\n", pathname); | |
780 | } | |
781 | ||
782 | ret = true; | |
783 | if (closedir(dir) < 0) { | |
784 | lxcfs_error("Failed to close directory %s: %s\n", dirname, strerror(errno)); | |
785 | ret = false; | |
786 | } | |
787 | ||
788 | if (unlinkat(cfd, dirname, AT_REMOVEDIR) < 0) { | |
789 | lxcfs_debug("Failed to delete %s: %s.\n", dirname, strerror(errno)); | |
790 | ret = false; | |
791 | } | |
792 | ||
793 | close(dupfd); | |
794 | ||
795 | return ret; | |
796 | } | |
797 | ||
798 | static bool cgfs_remove(const char *controller, const char *cg) | |
799 | { | |
800 | int fd, cfd; | |
801 | size_t len; | |
802 | char *dirnam; | |
803 | bool bret; | |
804 | ||
805 | cfd = get_cgroup_fd(controller); | |
806 | if (cfd < 0) | |
807 | return false; | |
808 | ||
809 | /* Make sure we pass a relative path to *at() family of functions. | |
810 | * . + /cg + \0 | |
811 | */ | |
812 | len = strlen(cg) + 2; | |
813 | dirnam = alloca(len); | |
814 | snprintf(dirnam, len, "%s%s", dot_or_empty(cg), cg); | |
815 | ||
816 | fd = openat(cfd, dirnam, O_DIRECTORY); | |
817 | if (fd < 0) | |
818 | return false; | |
819 | ||
820 | bret = recursive_rmdir(dirnam, fd, cfd); | |
821 | close(fd); | |
822 | return bret; | |
823 | } | |
824 | ||
825 | int cg_rmdir(const char *path) | |
826 | { | |
827 | struct fuse_context *fc = fuse_get_context(); | |
828 | char *last = NULL, *cgdir = NULL, *controller, *next = NULL; | |
829 | const char *cgroup; | |
830 | int ret; | |
831 | ||
832 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
833 | return -EIO; | |
834 | ||
835 | controller = pick_controller_from_path(fc, path); | |
836 | if (!controller) /* Someone's trying to delete "/cgroup". */ | |
837 | return -EPERM; | |
838 | ||
839 | cgroup = find_cgroup_in_path(path); | |
840 | if (!cgroup) /* Someone's trying to delete a controller e.g. "/blkio". */ | |
841 | return -EPERM; | |
842 | ||
843 | get_cgdir_and_path(cgroup, &cgdir, &last); | |
844 | if (!last) { | |
845 | /* Someone's trying to delete a cgroup on the same level as the | |
846 | * "/lxc" cgroup e.g. rmdir "/cgroup/blkio/lxc" or | |
847 | * rmdir "/cgroup/blkio/init.slice". | |
848 | */ | |
849 | ret = -EPERM; | |
850 | goto out; | |
851 | } | |
852 | ||
853 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
854 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
855 | initpid = fc->pid; | |
856 | if (!caller_is_in_ancestor(initpid, controller, cgroup, &next)) { | |
857 | if (!last || (next && (strcmp(next, last) == 0))) | |
858 | ret = -EBUSY; | |
859 | else | |
860 | ret = -ENOENT; | |
861 | goto out; | |
862 | } | |
863 | ||
864 | if (!fc_may_access(fc, controller, cgdir, NULL, O_WRONLY)) { | |
865 | ret = -EACCES; | |
866 | goto out; | |
867 | } | |
868 | if (!caller_is_in_ancestor(initpid, controller, cgroup, NULL)) { | |
869 | ret = -EACCES; | |
870 | goto out; | |
871 | } | |
872 | ||
873 | if (!cgfs_remove(controller, cgroup)) { | |
874 | ret = -EINVAL; | |
875 | goto out; | |
876 | } | |
877 | ||
878 | ret = 0; | |
879 | ||
880 | out: | |
881 | free(cgdir); | |
882 | free(next); | |
883 | return ret; | |
884 | } | |
885 | ||
886 | static bool cgfs_chmod_file(const char *controller, const char *file, mode_t mode) | |
887 | { | |
888 | int cfd; | |
889 | size_t len; | |
890 | char *pathname; | |
891 | ||
892 | cfd = get_cgroup_fd(controller); | |
893 | if (cfd < 0) | |
894 | return false; | |
895 | ||
896 | /* Make sure we pass a relative path to *at() family of functions. | |
897 | * . + /file + \0 | |
898 | */ | |
899 | len = strlen(file) + 2; | |
900 | pathname = alloca(len); | |
901 | snprintf(pathname, len, "%s%s", dot_or_empty(file), file); | |
902 | if (fchmodat(cfd, pathname, mode, 0) < 0) | |
903 | return false; | |
904 | return true; | |
905 | } | |
906 | ||
907 | int cg_chmod(const char *path, mode_t mode) | |
908 | { | |
909 | struct fuse_context *fc = fuse_get_context(); | |
910 | char * cgdir = NULL, *last = NULL, *path1, *path2, *controller; | |
911 | struct cgfs_files *k = NULL; | |
912 | const char *cgroup; | |
913 | int ret; | |
914 | ||
915 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
916 | return -EIO; | |
917 | ||
918 | if (strcmp(path, "/cgroup") == 0) | |
919 | return -EPERM; | |
920 | ||
921 | controller = pick_controller_from_path(fc, path); | |
922 | if (!controller) | |
923 | return errno == ENOENT ? -EPERM : -errno; | |
924 | ||
925 | cgroup = find_cgroup_in_path(path); | |
926 | if (!cgroup) | |
927 | /* this is just /cgroup/controller */ | |
928 | return -EPERM; | |
929 | ||
930 | get_cgdir_and_path(cgroup, &cgdir, &last); | |
931 | ||
932 | if (!last) { | |
933 | path1 = "/"; | |
934 | path2 = cgdir; | |
935 | } else { | |
936 | path1 = cgdir; | |
937 | path2 = last; | |
938 | } | |
939 | ||
940 | if (is_child_cgroup(controller, path1, path2)) { | |
941 | // get uid, gid, from '/tasks' file and make up a mode | |
942 | // That is a hack, until cgmanager gains a GetCgroupPerms fn. | |
943 | k = cgfs_get_key(controller, cgroup, "tasks"); | |
944 | ||
945 | } else | |
946 | k = cgfs_get_key(controller, path1, path2); | |
947 | ||
948 | if (!k) { | |
949 | ret = -EINVAL; | |
950 | goto out; | |
951 | } | |
952 | ||
953 | /* | |
954 | * This being a fuse request, the uid and gid must be valid | |
955 | * in the caller's namespace. So we can just check to make | |
956 | * sure that the caller is root in his uid, and privileged | |
957 | * over the file's current owner. | |
958 | */ | |
959 | if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_OPT)) { | |
960 | ret = -EPERM; | |
961 | goto out; | |
962 | } | |
963 | ||
964 | if (!cgfs_chmod_file(controller, cgroup, mode)) { | |
965 | ret = -EINVAL; | |
966 | goto out; | |
967 | } | |
968 | ||
969 | ret = 0; | |
970 | out: | |
971 | free_key(k); | |
972 | free(cgdir); | |
973 | return ret; | |
974 | } | |
975 | ||
976 | static int is_dir(const char *path, int fd) | |
977 | { | |
978 | struct stat statbuf; | |
979 | int ret = fstatat(fd, path, &statbuf, fd); | |
980 | if (ret == 0 && S_ISDIR(statbuf.st_mode)) | |
981 | return 1; | |
982 | return 0; | |
983 | } | |
984 | ||
985 | static int chown_tasks_files(const char *dirname, uid_t uid, gid_t gid, int fd) | |
986 | { | |
987 | size_t len; | |
988 | char *fname; | |
989 | ||
990 | len = strlen(dirname) + strlen("/cgroup.procs") + 1; | |
991 | fname = alloca(len); | |
992 | snprintf(fname, len, "%s/tasks", dirname); | |
993 | if (fchownat(fd, fname, uid, gid, 0) != 0) | |
994 | return -errno; | |
995 | snprintf(fname, len, "%s/cgroup.procs", dirname); | |
996 | if (fchownat(fd, fname, uid, gid, 0) != 0) | |
997 | return -errno; | |
998 | return 0; | |
999 | } | |
1000 | ||
1001 | static int cgfs_chown_file(const char *controller, const char *file, uid_t uid, | |
1002 | gid_t gid) | |
1003 | { | |
1004 | int cfd; | |
1005 | size_t len; | |
1006 | char *pathname; | |
1007 | ||
1008 | cfd = get_cgroup_fd(controller); | |
1009 | if (cfd < 0) | |
1010 | return false; | |
1011 | ||
1012 | /* Make sure we pass a relative path to *at() family of functions. | |
1013 | * . + /file + \0 | |
1014 | */ | |
1015 | len = strlen(file) + 2; | |
1016 | pathname = alloca(len); | |
1017 | snprintf(pathname, len, "%s%s", dot_or_empty(file), file); | |
1018 | if (fchownat(cfd, pathname, uid, gid, 0) < 0) | |
1019 | return -errno; | |
1020 | ||
1021 | if (is_dir(pathname, cfd)) | |
1022 | return chown_tasks_files(pathname, uid, gid, cfd); | |
1023 | ||
1024 | return 0; | |
1025 | } | |
1026 | ||
1027 | int cg_chown(const char *path, uid_t uid, gid_t gid) | |
1028 | { | |
1029 | struct fuse_context *fc = fuse_get_context(); | |
1030 | char *cgdir = NULL, *last = NULL, *path1, *path2, *controller; | |
1031 | struct cgfs_files *k = NULL; | |
1032 | const char *cgroup; | |
1033 | int ret; | |
1034 | ||
1035 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
1036 | return -EIO; | |
1037 | ||
1038 | if (strcmp(path, "/cgroup") == 0) | |
1039 | return -EPERM; | |
1040 | ||
1041 | controller = pick_controller_from_path(fc, path); | |
1042 | if (!controller) | |
1043 | return errno == ENOENT ? -EPERM : -errno; | |
1044 | ||
1045 | cgroup = find_cgroup_in_path(path); | |
1046 | if (!cgroup) | |
1047 | /* this is just /cgroup/controller */ | |
1048 | return -EPERM; | |
1049 | ||
1050 | get_cgdir_and_path(cgroup, &cgdir, &last); | |
1051 | ||
1052 | if (!last) { | |
1053 | path1 = "/"; | |
1054 | path2 = cgdir; | |
1055 | } else { | |
1056 | path1 = cgdir; | |
1057 | path2 = last; | |
1058 | } | |
1059 | ||
1060 | if (is_child_cgroup(controller, path1, path2)) { | |
1061 | // get uid, gid, from '/tasks' file and make up a mode | |
1062 | // That is a hack, until cgmanager gains a GetCgroupPerms fn. | |
1063 | k = cgfs_get_key(controller, cgroup, "tasks"); | |
1064 | ||
1065 | } else | |
1066 | k = cgfs_get_key(controller, path1, path2); | |
1067 | ||
1068 | if (!k) { | |
1069 | ret = -EINVAL; | |
1070 | goto out; | |
1071 | } | |
1072 | ||
1073 | /* | |
1074 | * This being a fuse request, the uid and gid must be valid | |
1075 | * in the caller's namespace. So we can just check to make | |
1076 | * sure that the caller is root in his uid, and privileged | |
1077 | * over the file's current owner. | |
1078 | */ | |
1079 | if (!is_privileged_over(fc->pid, fc->uid, k->uid, NS_ROOT_REQD)) { | |
1080 | ret = -EACCES; | |
1081 | goto out; | |
1082 | } | |
1083 | ||
1084 | ret = cgfs_chown_file(controller, cgroup, uid, gid); | |
1085 | ||
1086 | out: | |
1087 | free_key(k); | |
1088 | free(cgdir); | |
1089 | ||
1090 | return ret; | |
1091 | } | |
1092 | ||
1093 | int cg_open(const char *path, struct fuse_file_info *fi) | |
1094 | { | |
1095 | const char *cgroup; | |
1096 | char *last = NULL, *path1, *path2, * cgdir = NULL, *controller; | |
1097 | struct cgfs_files *k = NULL; | |
1098 | struct file_info *file_info; | |
1099 | struct fuse_context *fc = fuse_get_context(); | |
1100 | int ret; | |
1101 | ||
1102 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
1103 | return -EIO; | |
1104 | ||
1105 | controller = pick_controller_from_path(fc, path); | |
1106 | if (!controller) | |
1107 | return -errno; | |
1108 | cgroup = find_cgroup_in_path(path); | |
1109 | if (!cgroup) | |
1110 | return -errno; | |
1111 | ||
1112 | get_cgdir_and_path(cgroup, &cgdir, &last); | |
1113 | if (!last) { | |
1114 | path1 = "/"; | |
1115 | path2 = cgdir; | |
1116 | } else { | |
1117 | path1 = cgdir; | |
1118 | path2 = last; | |
1119 | } | |
1120 | ||
1121 | k = cgfs_get_key(controller, path1, path2); | |
1122 | if (!k) { | |
1123 | ret = -EINVAL; | |
1124 | goto out; | |
1125 | } | |
1126 | free_key(k); | |
1127 | ||
1128 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
1129 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
1130 | initpid = fc->pid; | |
1131 | if (!caller_may_see_dir(initpid, controller, path1)) { | |
1132 | ret = -ENOENT; | |
1133 | goto out; | |
1134 | } | |
1135 | if (!fc_may_access(fc, controller, path1, path2, fi->flags)) { | |
1136 | ret = -EACCES; | |
1137 | goto out; | |
1138 | } | |
1139 | ||
1140 | /* we'll free this at cg_release */ | |
1141 | file_info = malloc(sizeof(*file_info)); | |
1142 | if (!file_info) { | |
1143 | ret = -ENOMEM; | |
1144 | goto out; | |
1145 | } | |
1146 | file_info->controller = must_copy_string(controller); | |
1147 | file_info->cgroup = must_copy_string(path1); | |
1148 | file_info->file = must_copy_string(path2); | |
1149 | file_info->type = LXC_TYPE_CGFILE; | |
1150 | file_info->buf = NULL; | |
1151 | file_info->buflen = 0; | |
1152 | ||
99b183fb | 1153 | fi->fh = PTR_TO_UINT64(file_info); |
580fe4df CB |
1154 | ret = 0; |
1155 | ||
1156 | out: | |
1157 | free(cgdir); | |
1158 | return ret; | |
1159 | } | |
1160 | ||
1161 | #define POLLIN_SET ( EPOLLIN | EPOLLHUP | EPOLLRDHUP ) | |
1162 | ||
580fe4df CB |
1163 | /* |
1164 | * pid_to_ns - reads pids from a ucred over a socket, then writes the | |
1165 | * int value back over the socket. This shifts the pid from the | |
1166 | * sender's pidns into tpid's pidns. | |
1167 | */ | |
1168 | static int pid_to_ns(int sock, pid_t tpid) | |
1169 | { | |
1170 | char v = '0'; | |
1171 | struct ucred cred; | |
1172 | ||
1173 | while (recv_creds(sock, &cred, &v)) { | |
1174 | if (v == '1') | |
1175 | return 0; | |
1176 | ||
1177 | if (write(sock, &cred.pid, sizeof(pid_t)) != sizeof(pid_t)) | |
1178 | return 1; | |
1179 | } | |
1180 | ||
1181 | return 0; | |
1182 | } | |
1183 | ||
1184 | /* | |
1185 | * pid_ns_clone_wrapper - wraps pid_to_ns or pid_from_ns for usage | |
1186 | * with clone(). This simply writes '1' as ACK back to the parent | |
1187 | * before calling the actual wrapped function. | |
1188 | */ | |
1189 | static int pid_ns_clone_wrapper(void *arg) { | |
1190 | struct pid_ns_clone_args* args = (struct pid_ns_clone_args *) arg; | |
1191 | char b = '1'; | |
1192 | ||
1193 | close(args->cpipe[0]); | |
1194 | if (write(args->cpipe[1], &b, sizeof(char)) < 0) | |
1195 | lxcfs_error("(child): error on write: %s.\n", strerror(errno)); | |
1196 | close(args->cpipe[1]); | |
1197 | return args->wrapped(args->sock, args->tpid); | |
1198 | } | |
1199 | ||
1200 | /* | |
1201 | * pid_to_ns_wrapper: when you setns into a pidns, you yourself remain | |
1202 | * in your old pidns. Only children which you clone will be in the target | |
1203 | * pidns. So the pid_to_ns_wrapper does the setns, then clones a child to | |
1204 | * actually convert pids. | |
1205 | * | |
1206 | * Note: glibc's fork() does not respect pidns, which can lead to failed | |
1207 | * assertions inside glibc (and thus failed forks) if the child's pid in | |
1208 | * the pidns and the parent pid outside are identical. Using clone prevents | |
1209 | * this issue. | |
1210 | */ | |
1211 | static void pid_to_ns_wrapper(int sock, pid_t tpid) | |
1212 | { | |
1213 | int newnsfd = -1, ret, cpipe[2]; | |
1214 | char fnam[100]; | |
1215 | pid_t cpid; | |
1216 | char v; | |
1217 | ||
1218 | ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid); | |
1219 | if (ret < 0 || ret >= sizeof(fnam)) | |
1220 | _exit(1); | |
1221 | newnsfd = open(fnam, O_RDONLY); | |
1222 | if (newnsfd < 0) | |
1223 | _exit(1); | |
1224 | if (setns(newnsfd, 0) < 0) | |
1225 | _exit(1); | |
1226 | close(newnsfd); | |
1227 | ||
1228 | if (pipe(cpipe) < 0) | |
1229 | _exit(1); | |
1230 | ||
1231 | struct pid_ns_clone_args args = { | |
1232 | .cpipe = cpipe, | |
1233 | .sock = sock, | |
1234 | .tpid = tpid, | |
1235 | .wrapped = &pid_to_ns | |
1236 | }; | |
1237 | size_t stack_size = sysconf(_SC_PAGESIZE); | |
1238 | void *stack = alloca(stack_size); | |
1239 | ||
1240 | cpid = clone(pid_ns_clone_wrapper, stack + stack_size, SIGCHLD, &args); | |
1241 | if (cpid < 0) | |
1242 | _exit(1); | |
1243 | ||
1244 | /* Give the child 1 second to be done forking and write its ack. */ | |
1245 | if (!wait_for_sock(cpipe[0], 1)) | |
1246 | _exit(1); | |
1247 | ret = read(cpipe[0], &v, 1); | |
1248 | if (ret != sizeof(char) || v != '1') | |
1249 | _exit(1); | |
1250 | ||
1251 | if (!wait_for_pid(cpid)) | |
1252 | _exit(1); | |
1253 | _exit(0); | |
1254 | } | |
1255 | ||
1256 | /* | |
1257 | * append pid to *src. | |
1258 | * src: a pointer to a char* in which ot append the pid. | |
1259 | * sz: the number of characters printed so far, minus trailing \0. | |
1260 | * asz: the allocated size so far | |
1261 | * pid: the pid to append | |
1262 | */ | |
1263 | static void must_strcat_pid(char **src, size_t *sz, size_t *asz, pid_t pid) | |
1264 | { | |
1265 | must_strcat(src, sz, asz, "%d\n", (int)pid); | |
1266 | } | |
1267 | ||
1268 | /* | |
1269 | * To read cgroup files with a particular pid, we will setns into the child | |
1270 | * pidns, open a pipe, fork a child - which will be the first to really be in | |
1271 | * the child ns - which does the cgfs_get_value and writes the data to the pipe. | |
1272 | */ | |
1273 | static bool do_read_pids(pid_t tpid, const char *contrl, const char *cg, | |
1274 | const char *file, char **d) | |
1275 | { | |
1276 | int sock[2] = {-1, -1}; | |
1277 | char *tmpdata = NULL; | |
1278 | int ret; | |
1279 | pid_t qpid, cpid = -1; | |
1280 | bool answer = false; | |
1281 | char v = '0'; | |
1282 | struct ucred cred; | |
1283 | size_t sz = 0, asz = 0; | |
1284 | ||
1285 | if (!cgroup_ops->get(cgroup_ops, contrl, cg, file, &tmpdata)) | |
1286 | return false; | |
1287 | ||
1288 | /* | |
1289 | * Now we read the pids from returned data one by one, pass | |
1290 | * them into a child in the target namespace, read back the | |
1291 | * translated pids, and put them into our to-return data | |
1292 | */ | |
1293 | ||
1294 | if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) { | |
1295 | perror("socketpair"); | |
1296 | free(tmpdata); | |
1297 | return false; | |
1298 | } | |
1299 | ||
1300 | cpid = fork(); | |
1301 | if (cpid == -1) | |
1302 | goto out; | |
1303 | ||
1304 | if (!cpid) // child - exits when done | |
1305 | pid_to_ns_wrapper(sock[1], tpid); | |
1306 | ||
1307 | char *ptr = tmpdata; | |
1308 | cred.uid = 0; | |
1309 | cred.gid = 0; | |
1310 | while (sscanf(ptr, "%d\n", &qpid) == 1) { | |
1311 | cred.pid = qpid; | |
1312 | ret = send_creds(sock[0], &cred, v, true); | |
1313 | ||
1314 | if (ret == SEND_CREDS_NOTSK) | |
1315 | goto next; | |
1316 | if (ret == SEND_CREDS_FAIL) | |
1317 | goto out; | |
1318 | ||
1319 | // read converted results | |
1320 | if (!wait_for_sock(sock[0], 2)) { | |
1321 | lxcfs_error("Timed out waiting for pid from child: %s.\n", strerror(errno)); | |
1322 | goto out; | |
1323 | } | |
1324 | if (read(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) { | |
1325 | lxcfs_error("Error reading pid from child: %s.\n", strerror(errno)); | |
1326 | goto out; | |
1327 | } | |
1328 | must_strcat_pid(d, &sz, &asz, qpid); | |
1329 | next: | |
1330 | ptr = strchr(ptr, '\n'); | |
1331 | if (!ptr) | |
1332 | break; | |
1333 | ptr++; | |
1334 | } | |
1335 | ||
1336 | cred.pid = getpid(); | |
1337 | v = '1'; | |
1338 | if (send_creds(sock[0], &cred, v, true) != SEND_CREDS_OK) { | |
1339 | // failed to ask child to exit | |
1340 | lxcfs_error("Failed to ask child to exit: %s.\n", strerror(errno)); | |
1341 | goto out; | |
1342 | } | |
1343 | ||
1344 | answer = true; | |
1345 | ||
1346 | out: | |
1347 | free(tmpdata); | |
1348 | if (cpid != -1) | |
1349 | wait_for_pid(cpid); | |
1350 | if (sock[0] != -1) { | |
1351 | close(sock[0]); | |
1352 | close(sock[1]); | |
1353 | } | |
1354 | return answer; | |
1355 | } | |
1356 | ||
1357 | int cg_read(const char *path, char *buf, size_t size, off_t offset, | |
1358 | struct fuse_file_info *fi) | |
1359 | { | |
1360 | struct fuse_context *fc = fuse_get_context(); | |
99b183fb | 1361 | struct file_info *f = INTTYPE_TO_PTR(fi->fh); |
580fe4df CB |
1362 | struct cgfs_files *k = NULL; |
1363 | char *data = NULL; | |
1364 | int ret, s; | |
1365 | bool r; | |
1366 | ||
1367 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
1368 | return -EIO; | |
1369 | ||
1370 | if (f->type != LXC_TYPE_CGFILE) { | |
1371 | lxcfs_error("%s\n", "Internal error: directory cache info used in cg_read."); | |
1372 | return -EIO; | |
1373 | } | |
1374 | ||
1375 | if (offset) | |
1376 | return 0; | |
1377 | ||
1378 | if (!f->controller) | |
1379 | return -EINVAL; | |
1380 | ||
1381 | if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) { | |
1382 | return -EINVAL; | |
1383 | } | |
1384 | free_key(k); | |
1385 | ||
1386 | ||
1387 | if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_RDONLY)) { | |
1388 | ret = -EACCES; | |
1389 | goto out; | |
1390 | } | |
1391 | ||
1392 | if (strcmp(f->file, "tasks") == 0 || | |
1393 | strcmp(f->file, "/tasks") == 0 || | |
1394 | strcmp(f->file, "/cgroup.procs") == 0 || | |
1395 | strcmp(f->file, "cgroup.procs") == 0) | |
1396 | // special case - we have to translate the pids | |
1397 | r = do_read_pids(fc->pid, f->controller, f->cgroup, f->file, &data); | |
1398 | else | |
1399 | r = cgroup_ops->get(cgroup_ops, f->controller, f->cgroup, f->file, &data); | |
1400 | ||
1401 | if (!r) { | |
1402 | ret = -EINVAL; | |
1403 | goto out; | |
1404 | } | |
1405 | ||
1406 | if (!data) { | |
1407 | ret = 0; | |
1408 | goto out; | |
1409 | } | |
1410 | s = strlen(data); | |
1411 | if (s > size) | |
1412 | s = size; | |
1413 | memcpy(buf, data, s); | |
1414 | if (s > 0 && s < size && data[s-1] != '\n') | |
1415 | buf[s++] = '\n'; | |
1416 | ||
1417 | ret = s; | |
1418 | ||
1419 | out: | |
1420 | free(data); | |
1421 | return ret; | |
1422 | } | |
1423 | ||
1424 | int cg_opendir(const char *path, struct fuse_file_info *fi) | |
1425 | { | |
1426 | struct fuse_context *fc = fuse_get_context(); | |
1427 | const char *cgroup; | |
1428 | struct file_info *dir_info; | |
1429 | char *controller = NULL; | |
1430 | ||
1431 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
1432 | return -EIO; | |
1433 | ||
1434 | if (strcmp(path, "/cgroup") == 0) { | |
1435 | cgroup = NULL; | |
1436 | controller = NULL; | |
1437 | } else { | |
1438 | // return list of keys for the controller, and list of child cgroups | |
1439 | controller = pick_controller_from_path(fc, path); | |
1440 | if (!controller) | |
1441 | return -errno; | |
1442 | ||
1443 | cgroup = find_cgroup_in_path(path); | |
1444 | if (!cgroup) { | |
1445 | /* this is just /cgroup/controller, return its contents */ | |
1446 | cgroup = "/"; | |
1447 | } | |
1448 | } | |
1449 | ||
1450 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
1451 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
1452 | initpid = fc->pid; | |
1453 | if (cgroup) { | |
1454 | if (!caller_may_see_dir(initpid, controller, cgroup)) | |
1455 | return -ENOENT; | |
1456 | if (!fc_may_access(fc, controller, cgroup, NULL, O_RDONLY)) | |
1457 | return -EACCES; | |
1458 | } | |
1459 | ||
1460 | /* we'll free this at cg_releasedir */ | |
1461 | dir_info = malloc(sizeof(*dir_info)); | |
1462 | if (!dir_info) | |
1463 | return -ENOMEM; | |
1464 | dir_info->controller = must_copy_string(controller); | |
1465 | dir_info->cgroup = must_copy_string(cgroup); | |
1466 | dir_info->type = LXC_TYPE_CGDIR; | |
1467 | dir_info->buf = NULL; | |
1468 | dir_info->file = NULL; | |
1469 | dir_info->buflen = 0; | |
1470 | ||
99b183fb | 1471 | fi->fh = PTR_TO_UINT64(dir_info); |
580fe4df CB |
1472 | return 0; |
1473 | } | |
1474 | ||
1475 | int cg_release(const char *path, struct fuse_file_info *fi) | |
1476 | { | |
1477 | do_release_file_info(fi); | |
1478 | return 0; | |
1479 | } | |
1480 | ||
1481 | int cg_releasedir(const char *path, struct fuse_file_info *fi) | |
1482 | { | |
1483 | do_release_file_info(fi); | |
1484 | return 0; | |
1485 | } | |
1486 | ||
1487 | static FILE *open_pids_file(const char *controller, const char *cgroup) | |
1488 | { | |
1489 | int fd, cfd; | |
1490 | size_t len; | |
1491 | char *pathname; | |
1492 | ||
1493 | cfd = get_cgroup_fd(controller); | |
1494 | if (cfd < 0) | |
1495 | return false; | |
1496 | ||
1497 | /* Make sure we pass a relative path to *at() family of functions. | |
1498 | * . + /cgroup + / "cgroup.procs" + \0 | |
1499 | */ | |
1500 | len = strlen(cgroup) + strlen("cgroup.procs") + 3; | |
1501 | pathname = alloca(len); | |
1502 | snprintf(pathname, len, "%s%s/cgroup.procs", dot_or_empty(cgroup), cgroup); | |
1503 | ||
1504 | fd = openat(cfd, pathname, O_WRONLY); | |
1505 | if (fd < 0) | |
1506 | return NULL; | |
1507 | ||
1508 | return fdopen(fd, "w"); | |
1509 | } | |
1510 | ||
1511 | static int pid_from_ns(int sock, pid_t tpid) | |
1512 | { | |
1513 | pid_t vpid; | |
1514 | struct ucred cred; | |
1515 | char v; | |
1516 | int ret; | |
1517 | ||
1518 | cred.uid = 0; | |
1519 | cred.gid = 0; | |
1520 | while (1) { | |
1521 | if (!wait_for_sock(sock, 2)) { | |
1522 | lxcfs_error("%s\n", "Timeout reading from parent."); | |
1523 | return 1; | |
1524 | } | |
1525 | if ((ret = read(sock, &vpid, sizeof(pid_t))) != sizeof(pid_t)) { | |
1526 | lxcfs_error("Bad read from parent: %s.\n", strerror(errno)); | |
1527 | return 1; | |
1528 | } | |
1529 | if (vpid == -1) // done | |
1530 | break; | |
1531 | v = '0'; | |
1532 | cred.pid = vpid; | |
1533 | if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK) { | |
1534 | v = '1'; | |
1535 | cred.pid = getpid(); | |
1536 | if (send_creds(sock, &cred, v, false) != SEND_CREDS_OK) | |
1537 | return 1; | |
1538 | } | |
1539 | } | |
1540 | return 0; | |
1541 | } | |
1542 | ||
1543 | static void pid_from_ns_wrapper(int sock, pid_t tpid) | |
1544 | { | |
1545 | int newnsfd = -1, ret, cpipe[2]; | |
1546 | char fnam[100]; | |
1547 | pid_t cpid; | |
1548 | char v; | |
1549 | ||
1550 | ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", tpid); | |
1551 | if (ret < 0 || ret >= sizeof(fnam)) | |
1552 | _exit(1); | |
1553 | newnsfd = open(fnam, O_RDONLY); | |
1554 | if (newnsfd < 0) | |
1555 | _exit(1); | |
1556 | if (setns(newnsfd, 0) < 0) | |
1557 | _exit(1); | |
1558 | close(newnsfd); | |
1559 | ||
1560 | if (pipe(cpipe) < 0) | |
1561 | _exit(1); | |
1562 | ||
1563 | struct pid_ns_clone_args args = { | |
1564 | .cpipe = cpipe, | |
1565 | .sock = sock, | |
1566 | .tpid = tpid, | |
1567 | .wrapped = &pid_from_ns | |
1568 | }; | |
1569 | size_t stack_size = sysconf(_SC_PAGESIZE); | |
1570 | void *stack = alloca(stack_size); | |
1571 | ||
1572 | cpid = clone(pid_ns_clone_wrapper, stack + stack_size, SIGCHLD, &args); | |
1573 | if (cpid < 0) | |
1574 | _exit(1); | |
1575 | ||
1576 | // give the child 1 second to be done forking and | |
1577 | // write its ack | |
1578 | if (!wait_for_sock(cpipe[0], 1)) | |
1579 | _exit(1); | |
1580 | ret = read(cpipe[0], &v, 1); | |
1581 | if (ret != sizeof(char) || v != '1') | |
1582 | _exit(1); | |
1583 | ||
1584 | if (!wait_for_pid(cpid)) | |
1585 | _exit(1); | |
1586 | _exit(0); | |
1587 | } | |
1588 | ||
1589 | /* | |
1590 | * get_pid_creds: get the real uid and gid of @pid from | |
1591 | * /proc/$$/status | |
1592 | * (XXX should we use euid here?) | |
1593 | */ | |
1594 | static void get_pid_creds(pid_t pid, uid_t *uid, gid_t *gid) | |
1595 | { | |
1596 | char line[400]; | |
1597 | uid_t u; | |
1598 | gid_t g; | |
1599 | FILE *f; | |
1600 | ||
1601 | *uid = -1; | |
1602 | *gid = -1; | |
1603 | sprintf(line, "/proc/%d/status", pid); | |
dbb1f822 | 1604 | if ((f = fopen(line, "re")) == NULL) { |
580fe4df CB |
1605 | lxcfs_error("Error opening %s: %s\n", line, strerror(errno)); |
1606 | return; | |
1607 | } | |
1608 | while (fgets(line, 400, f)) { | |
1609 | if (strncmp(line, "Uid:", 4) == 0) { | |
1610 | if (sscanf(line+4, "%u", &u) != 1) { | |
1611 | lxcfs_error("bad uid line for pid %u\n", pid); | |
1612 | fclose(f); | |
1613 | return; | |
1614 | } | |
1615 | *uid = u; | |
1616 | } else if (strncmp(line, "Gid:", 4) == 0) { | |
1617 | if (sscanf(line+4, "%u", &g) != 1) { | |
1618 | lxcfs_error("bad gid line for pid %u\n", pid); | |
1619 | fclose(f); | |
1620 | return; | |
1621 | } | |
1622 | *gid = g; | |
1623 | } | |
1624 | } | |
1625 | fclose(f); | |
1626 | } | |
1627 | ||
1628 | /* | |
1629 | * Given host @uid, return the uid to which it maps in | |
1630 | * @pid's user namespace, or -1 if none. | |
1631 | */ | |
1632 | static bool hostuid_to_ns(uid_t uid, pid_t pid, uid_t *answer) | |
1633 | { | |
1634 | FILE *f; | |
1635 | char line[400]; | |
1636 | ||
1637 | sprintf(line, "/proc/%d/uid_map", pid); | |
dbb1f822 | 1638 | if ((f = fopen(line, "re")) == NULL) { |
580fe4df CB |
1639 | return false; |
1640 | } | |
1641 | ||
1642 | *answer = convert_id_to_ns(f, uid); | |
1643 | fclose(f); | |
1644 | ||
1645 | if (*answer == -1) | |
1646 | return false; | |
1647 | return true; | |
1648 | } | |
1649 | ||
1650 | /* | |
1651 | * May the requestor @r move victim @v to a new cgroup? | |
1652 | * This is allowed if | |
1653 | * . they are the same task | |
1654 | * . they are ownedy by the same uid | |
1655 | * . @r is root on the host, or | |
1656 | * . @v's uid is mapped into @r's where @r is root. | |
1657 | */ | |
1658 | static bool may_move_pid(pid_t r, uid_t r_uid, pid_t v) | |
1659 | { | |
1660 | uid_t v_uid, tmpuid; | |
1661 | gid_t v_gid; | |
1662 | ||
1663 | if (r == v) | |
1664 | return true; | |
1665 | if (r_uid == 0) | |
1666 | return true; | |
1667 | get_pid_creds(v, &v_uid, &v_gid); | |
1668 | if (r_uid == v_uid) | |
1669 | return true; | |
1670 | if (hostuid_to_ns(r_uid, r, &tmpuid) && tmpuid == 0 | |
1671 | && hostuid_to_ns(v_uid, r, &tmpuid)) | |
1672 | return true; | |
1673 | return false; | |
1674 | } | |
1675 | ||
1676 | static bool do_write_pids(pid_t tpid, uid_t tuid, const char *contrl, | |
1677 | const char *cg, const char *file, const char *buf) | |
1678 | { | |
1679 | int sock[2] = {-1, -1}; | |
1680 | pid_t qpid, cpid = -1; | |
1681 | FILE *pids_file = NULL; | |
1682 | bool answer = false, fail = false; | |
1683 | ||
1684 | pids_file = open_pids_file(contrl, cg); | |
1685 | if (!pids_file) | |
1686 | return false; | |
1687 | ||
1688 | /* | |
1689 | * write the pids to a socket, have helper in writer's pidns | |
1690 | * call movepid for us | |
1691 | */ | |
1692 | if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) { | |
1693 | perror("socketpair"); | |
1694 | goto out; | |
1695 | } | |
1696 | ||
1697 | cpid = fork(); | |
1698 | if (cpid == -1) | |
1699 | goto out; | |
1700 | ||
1701 | if (!cpid) { // child | |
1702 | fclose(pids_file); | |
1703 | pid_from_ns_wrapper(sock[1], tpid); | |
1704 | } | |
1705 | ||
1706 | const char *ptr = buf; | |
1707 | while (sscanf(ptr, "%d", &qpid) == 1) { | |
1708 | struct ucred cred; | |
1709 | char v; | |
1710 | ||
1711 | if (write(sock[0], &qpid, sizeof(qpid)) != sizeof(qpid)) { | |
1712 | lxcfs_error("Error writing pid to child: %s.\n", strerror(errno)); | |
1713 | goto out; | |
1714 | } | |
1715 | ||
1716 | if (recv_creds(sock[0], &cred, &v)) { | |
1717 | if (v == '0') { | |
1718 | if (!may_move_pid(tpid, tuid, cred.pid)) { | |
1719 | fail = true; | |
1720 | break; | |
1721 | } | |
1722 | if (fprintf(pids_file, "%d", (int) cred.pid) < 0) | |
1723 | fail = true; | |
1724 | } | |
1725 | } | |
1726 | ||
1727 | ptr = strchr(ptr, '\n'); | |
1728 | if (!ptr) | |
1729 | break; | |
1730 | ptr++; | |
1731 | } | |
1732 | ||
1733 | /* All good, write the value */ | |
1734 | qpid = -1; | |
1735 | if (write(sock[0], &qpid ,sizeof(qpid)) != sizeof(qpid)) | |
1736 | lxcfs_error("%s\n", "Warning: failed to ask child to exit."); | |
1737 | ||
1738 | if (!fail) | |
1739 | answer = true; | |
1740 | ||
1741 | out: | |
1742 | if (cpid != -1) | |
1743 | wait_for_pid(cpid); | |
1744 | if (sock[0] != -1) { | |
1745 | close(sock[0]); | |
1746 | close(sock[1]); | |
1747 | } | |
1748 | if (pids_file) { | |
1749 | if (fclose(pids_file) != 0) | |
1750 | answer = false; | |
1751 | } | |
1752 | return answer; | |
1753 | } | |
1754 | ||
1755 | static bool write_string(const char *fnam, const char *string, int fd) | |
1756 | { | |
1757 | FILE *f; | |
1758 | size_t len, ret; | |
1759 | ||
1760 | f = fdopen(fd, "w"); | |
1761 | if (!f) | |
1762 | return false; | |
1763 | ||
1764 | len = strlen(string); | |
1765 | ret = fwrite(string, 1, len, f); | |
1766 | if (ret != len) { | |
1767 | lxcfs_error("%s - Error writing \"%s\" to \"%s\"\n", | |
1768 | strerror(errno), string, fnam); | |
1769 | fclose(f); | |
1770 | return false; | |
1771 | } | |
1772 | ||
1773 | if (fclose(f) < 0) { | |
1774 | lxcfs_error("%s - Failed to close \"%s\"\n", strerror(errno), fnam); | |
1775 | return false; | |
1776 | } | |
1777 | ||
1778 | return true; | |
1779 | } | |
1780 | ||
1781 | static bool cgfs_set_value(const char *controller, const char *cgroup, | |
1782 | const char *file, const char *value) | |
1783 | { | |
1784 | int ret, fd, cfd; | |
1785 | size_t len; | |
1786 | char *fnam; | |
1787 | ||
1788 | cfd = get_cgroup_fd(controller); | |
1789 | if (cfd < 0) | |
1790 | return false; | |
1791 | ||
1792 | /* Make sure we pass a relative path to *at() family of functions. | |
1793 | * . + /cgroup + / + file + \0 | |
1794 | */ | |
1795 | len = strlen(cgroup) + strlen(file) + 3; | |
1796 | fnam = alloca(len); | |
1797 | ret = snprintf(fnam, len, "%s%s/%s", dot_or_empty(cgroup), cgroup, file); | |
1798 | if (ret < 0 || (size_t)ret >= len) | |
1799 | return false; | |
1800 | ||
1801 | fd = openat(cfd, fnam, O_WRONLY); | |
1802 | if (fd < 0) | |
1803 | return false; | |
1804 | ||
1805 | return write_string(fnam, value, fd); | |
1806 | } | |
1807 | ||
1808 | int cg_write(const char *path, const char *buf, size_t size, off_t offset, | |
1809 | struct fuse_file_info *fi) | |
1810 | { | |
1811 | struct fuse_context *fc = fuse_get_context(); | |
1812 | char *localbuf = NULL; | |
1813 | struct cgfs_files *k = NULL; | |
99b183fb | 1814 | struct file_info *f = INTTYPE_TO_PTR(fi->fh); |
580fe4df CB |
1815 | bool r; |
1816 | ||
1817 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
1818 | return -EIO; | |
1819 | ||
1820 | if (f->type != LXC_TYPE_CGFILE) { | |
1821 | lxcfs_error("%s\n", "Internal error: directory cache info used in cg_write."); | |
1822 | return -EIO; | |
1823 | } | |
1824 | ||
1825 | if (offset) | |
1826 | return 0; | |
1827 | ||
1828 | localbuf = alloca(size+1); | |
1829 | localbuf[size] = '\0'; | |
1830 | memcpy(localbuf, buf, size); | |
1831 | ||
1832 | if ((k = cgfs_get_key(f->controller, f->cgroup, f->file)) == NULL) { | |
1833 | size = -EINVAL; | |
1834 | goto out; | |
1835 | } | |
1836 | ||
1837 | if (!fc_may_access(fc, f->controller, f->cgroup, f->file, O_WRONLY)) { | |
1838 | size = -EACCES; | |
1839 | goto out; | |
1840 | } | |
1841 | ||
1842 | if (strcmp(f->file, "tasks") == 0 || | |
1843 | strcmp(f->file, "/tasks") == 0 || | |
1844 | strcmp(f->file, "/cgroup.procs") == 0 || | |
1845 | strcmp(f->file, "cgroup.procs") == 0) | |
1846 | // special case - we have to translate the pids | |
1847 | r = do_write_pids(fc->pid, fc->uid, f->controller, f->cgroup, f->file, localbuf); | |
1848 | else | |
1849 | r = cgfs_set_value(f->controller, f->cgroup, f->file, localbuf); | |
1850 | ||
1851 | if (!r) | |
1852 | size = -EINVAL; | |
1853 | ||
1854 | out: | |
1855 | free_key(k); | |
1856 | return size; | |
1857 | } | |
1858 | ||
1859 | static bool cgfs_iterate_cgroup(const char *controller, const char *cgroup, | |
1860 | bool directories, void ***list, size_t typesize, | |
1861 | void *(*iterator)(const char *, const char *, const char *)) | |
1862 | { | |
1863 | int cfd, fd, ret; | |
1864 | size_t len; | |
1865 | char *cg; | |
1866 | char pathname[MAXPATHLEN]; | |
1867 | size_t sz = 0, asz = 0; | |
1868 | struct dirent *dirent; | |
1869 | DIR *dir; | |
1870 | ||
1871 | cfd = get_cgroup_fd(controller); | |
1872 | *list = NULL; | |
1873 | if (cfd < 0) | |
1874 | return false; | |
1875 | ||
1876 | /* Make sure we pass a relative path to *at() family of functions. */ | |
1877 | len = strlen(cgroup) + 1 /* . */ + 1 /* \0 */; | |
1878 | cg = alloca(len); | |
1879 | ret = snprintf(cg, len, "%s%s", dot_or_empty(cgroup), cgroup); | |
1880 | if (ret < 0 || (size_t)ret >= len) { | |
1881 | lxcfs_error("Pathname too long under %s\n", cgroup); | |
1882 | return false; | |
1883 | } | |
1884 | ||
1885 | fd = openat(cfd, cg, O_DIRECTORY); | |
1886 | if (fd < 0) | |
1887 | return false; | |
1888 | ||
1889 | dir = fdopendir(fd); | |
1890 | if (!dir) | |
1891 | return false; | |
1892 | ||
1893 | while ((dirent = readdir(dir))) { | |
1894 | struct stat mystat; | |
1895 | ||
1896 | if (!strcmp(dirent->d_name, ".") || | |
1897 | !strcmp(dirent->d_name, "..")) | |
1898 | continue; | |
1899 | ||
1900 | ret = snprintf(pathname, MAXPATHLEN, "%s/%s", cg, dirent->d_name); | |
1901 | if (ret < 0 || ret >= MAXPATHLEN) { | |
1902 | lxcfs_error("Pathname too long under %s\n", cg); | |
1903 | continue; | |
1904 | } | |
1905 | ||
1906 | ret = fstatat(cfd, pathname, &mystat, AT_SYMLINK_NOFOLLOW); | |
1907 | if (ret) { | |
1908 | lxcfs_error("Failed to stat %s: %s\n", pathname, strerror(errno)); | |
1909 | continue; | |
1910 | } | |
1911 | if ((!directories && !S_ISREG(mystat.st_mode)) || | |
1912 | (directories && !S_ISDIR(mystat.st_mode))) | |
1913 | continue; | |
1914 | ||
1915 | if (sz+2 >= asz) { | |
1916 | void **tmp; | |
1917 | asz += BATCH_SIZE; | |
1918 | do { | |
1919 | tmp = realloc(*list, asz * typesize); | |
1920 | } while (!tmp); | |
1921 | *list = tmp; | |
1922 | } | |
1923 | (*list)[sz] = (*iterator)(controller, cg, dirent->d_name); | |
1924 | (*list)[sz+1] = NULL; | |
1925 | sz++; | |
1926 | } | |
1927 | if (closedir(dir) < 0) { | |
1928 | lxcfs_error("Failed closedir for %s: %s\n", cgroup, strerror(errno)); | |
1929 | return false; | |
1930 | } | |
1931 | return true; | |
1932 | } | |
1933 | ||
1934 | static void *make_key_list_entry(const char *controller, const char *cgroup, | |
1935 | const char *dir_entry) | |
1936 | { | |
1937 | struct cgfs_files *entry; | |
1938 | ||
1939 | entry = cgfs_get_key(controller, cgroup, dir_entry); | |
1940 | if (!entry) | |
1941 | lxcfs_error("Failed to retrieve files under %s:%s\n", | |
1942 | controller, cgroup); | |
1943 | return entry; | |
1944 | } | |
1945 | ||
1946 | static bool cgfs_list_keys(const char *controller, const char *cgroup, | |
1947 | struct cgfs_files ***keys) | |
1948 | { | |
1949 | return cgfs_iterate_cgroup(controller, cgroup, false, (void ***)keys, | |
1950 | sizeof(*keys), &make_key_list_entry); | |
1951 | } | |
1952 | ||
1953 | static void *make_children_list_entry(const char *controller, | |
1954 | const char *cgroup, const char *dir_entry) | |
1955 | { | |
1956 | return strdup(dir_entry); | |
1957 | } | |
1958 | ||
1959 | static bool cgfs_list_children(const char *controller, const char *cgroup, | |
1960 | char ***list) | |
1961 | { | |
1962 | return cgfs_iterate_cgroup(controller, cgroup, true, (void ***)list, | |
1963 | sizeof(*list), &make_children_list_entry); | |
1964 | } | |
1965 | ||
1966 | static void free_keys(struct cgfs_files **keys) | |
1967 | { | |
1968 | if (!keys) | |
1969 | return; | |
1970 | ||
1971 | for (int i = 0; keys[i]; i++) | |
1972 | free_key(keys[i]); | |
1973 | ||
1974 | free_disarm(keys); | |
1975 | } | |
1976 | ||
1977 | int cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, | |
1978 | off_t offset, struct fuse_file_info *fi) | |
1979 | { | |
99b183fb | 1980 | struct file_info *d = INTTYPE_TO_PTR(fi->fh); |
580fe4df CB |
1981 | struct cgfs_files **list = NULL; |
1982 | int i, ret; | |
1983 | char *nextcg = NULL; | |
1984 | struct fuse_context *fc = fuse_get_context(); | |
1985 | char **clist = NULL; | |
1986 | ||
1987 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
1988 | return -EIO; | |
1989 | ||
1990 | if (filler(buf, ".", NULL, 0) != 0 || filler(buf, "..", NULL, 0) != 0) | |
1991 | return -EIO; | |
1992 | ||
1993 | if (d->type != LXC_TYPE_CGDIR) { | |
1994 | lxcfs_error("%s\n", "Internal error: file cache info used in readdir."); | |
1995 | return -EIO; | |
1996 | } | |
1997 | if (!d->cgroup && !d->controller) { | |
1998 | /* | |
1999 | * ls /var/lib/lxcfs/cgroup - just show list of controllers. | |
2000 | * This only works with the legacy hierarchy. | |
2001 | */ | |
2002 | for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++) { | |
2003 | if (is_unified_hierarchy(*h)) | |
2004 | continue; | |
2005 | ||
2006 | if ((*h)->__controllers && filler(buf, (*h)->__controllers, NULL, 0)) | |
2007 | return -EIO; | |
2008 | } | |
2009 | ||
2010 | return 0; | |
2011 | } | |
2012 | ||
2013 | if (!cgfs_list_keys(d->controller, d->cgroup, &list)) { | |
2014 | // not a valid cgroup | |
2015 | ret = -EINVAL; | |
2016 | goto out; | |
2017 | } | |
2018 | ||
2019 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
2020 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
2021 | initpid = fc->pid; | |
2022 | if (!caller_is_in_ancestor(initpid, d->controller, d->cgroup, &nextcg)) { | |
2023 | if (nextcg) { | |
2024 | ret = filler(buf, nextcg, NULL, 0); | |
2025 | free(nextcg); | |
2026 | if (ret != 0) { | |
2027 | ret = -EIO; | |
2028 | goto out; | |
2029 | } | |
2030 | } | |
2031 | ret = 0; | |
2032 | goto out; | |
2033 | } | |
2034 | ||
2035 | for (i = 0; list && list[i]; i++) { | |
2036 | if (filler(buf, list[i]->name, NULL, 0) != 0) { | |
2037 | ret = -EIO; | |
2038 | goto out; | |
2039 | } | |
2040 | } | |
2041 | ||
2042 | // now get the list of child cgroups | |
2043 | ||
2044 | if (!cgfs_list_children(d->controller, d->cgroup, &clist)) { | |
2045 | ret = 0; | |
2046 | goto out; | |
2047 | } | |
2048 | if (clist) { | |
2049 | for (i = 0; clist[i]; i++) { | |
2050 | if (filler(buf, clist[i], NULL, 0) != 0) { | |
2051 | ret = -EIO; | |
2052 | goto out; | |
2053 | } | |
2054 | } | |
2055 | } | |
2056 | ret = 0; | |
2057 | ||
2058 | out: | |
2059 | free_keys(list); | |
2060 | if (clist) { | |
2061 | for (i = 0; clist[i]; i++) | |
2062 | free(clist[i]); | |
2063 | free(clist); | |
2064 | } | |
2065 | return ret; | |
2066 | } | |
2067 | ||
2068 | int cg_access(const char *path, int mode) | |
2069 | { | |
2070 | int ret; | |
2071 | const char *cgroup; | |
2072 | char *path1, *path2, *controller; | |
2073 | char *last = NULL, *cgdir = NULL; | |
2074 | struct cgfs_files *k = NULL; | |
2075 | struct fuse_context *fc = fuse_get_context(); | |
2076 | ||
2077 | if (!fc || !cgroup_ops || pure_unified_layout(cgroup_ops)) | |
2078 | return -EIO; | |
2079 | ||
2080 | if (strcmp(path, "/cgroup") == 0) | |
2081 | return 0; | |
2082 | ||
2083 | controller = pick_controller_from_path(fc, path); | |
2084 | if (!controller) | |
2085 | return -errno; | |
2086 | cgroup = find_cgroup_in_path(path); | |
2087 | if (!cgroup) { | |
2088 | // access("/sys/fs/cgroup/systemd", mode) - rx allowed, w not | |
2089 | if ((mode & W_OK) == 0) | |
2090 | return 0; | |
2091 | return -EACCES; | |
2092 | } | |
2093 | ||
2094 | get_cgdir_and_path(cgroup, &cgdir, &last); | |
2095 | if (!last) { | |
2096 | path1 = "/"; | |
2097 | path2 = cgdir; | |
2098 | } else { | |
2099 | path1 = cgdir; | |
2100 | path2 = last; | |
2101 | } | |
2102 | ||
2103 | k = cgfs_get_key(controller, path1, path2); | |
2104 | if (!k) { | |
2105 | if ((mode & W_OK) == 0) | |
2106 | ret = 0; | |
2107 | else | |
2108 | ret = -EACCES; | |
2109 | goto out; | |
2110 | } | |
2111 | free_key(k); | |
2112 | ||
2113 | pid_t initpid = lookup_initpid_in_store(fc->pid); | |
2114 | if (initpid <= 1 || is_shared_pidns(initpid)) | |
2115 | initpid = fc->pid; | |
2116 | if (!caller_may_see_dir(initpid, controller, path1)) { | |
2117 | ret = -ENOENT; | |
2118 | goto out; | |
2119 | } | |
2120 | if (!fc_may_access(fc, controller, path1, path2, mode)) { | |
2121 | ret = -EACCES; | |
2122 | goto out; | |
2123 | } | |
2124 | ||
2125 | ret = 0; | |
2126 | ||
2127 | out: | |
2128 | free(cgdir); | |
2129 | return ret; | |
2130 | } |