]>
Commit | Line | Data |
---|---|---|
cc73685d | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
e3642c43 | 2 | |
d38dd64a CB |
3 | #ifndef _GNU_SOURCE |
4 | #define _GNU_SOURCE 1 | |
5 | #endif | |
7935833c | 6 | #define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */ |
643c1984 | 7 | #include <ctype.h> |
a1e5280d | 8 | #include <dirent.h> |
e3642c43 | 9 | #include <errno.h> |
a1e5280d | 10 | #include <fcntl.h> |
dbaf55a3 | 11 | #include <grp.h> |
7935833c | 12 | #include <inttypes.h> |
a1e5280d | 13 | #include <libgen.h> |
b467714b | 14 | #include <pthread.h> |
39293f22 | 15 | #include <signal.h> |
d983b93c | 16 | #include <stddef.h> |
a1e5280d CB |
17 | #include <stdio.h> |
18 | #include <stdlib.h> | |
61a1d519 | 19 | #include <string.h> |
e3642c43 | 20 | #include <sys/mman.h> |
6e4bb2e0 | 21 | #include <sys/mount.h> |
066210f0 CB |
22 | /* Needs to be after sys/mount.h header */ |
23 | #include <linux/fs.h> | |
a1e5280d CB |
24 | #include <sys/param.h> |
25 | #include <sys/prctl.h> | |
26 | #include <sys/stat.h> | |
9be53773 SH |
27 | #include <sys/types.h> |
28 | #include <sys/wait.h> | |
d38dd64a | 29 | #include <unistd.h> |
e3642c43 | 30 | |
d38dd64a | 31 | #include "config.h" |
e3642c43 | 32 | #include "log.h" |
4fef78bc | 33 | #include "lsm/lsm.h" |
025ed0f3 | 34 | #include "lxclock.h" |
c4382ee2 | 35 | #include "memory_utils.h" |
51d0854c | 36 | #include "namespace.h" |
e3db0162 | 37 | #include "parse.h" |
38e5c2db | 38 | #include "raw_syscalls.h" |
b25291da | 39 | #include "syscall_wrappers.h" |
981f6029 | 40 | #include "utils.h" |
e3642c43 | 41 | |
43f984ea DJ |
42 | #ifndef HAVE_STRLCPY |
43 | #include "include/strlcpy.h" | |
44 | #endif | |
45 | ||
bd583214 DJ |
46 | #ifndef HAVE_STRLCAT |
47 | #include "include/strlcat.h" | |
48 | #endif | |
49 | ||
4928c718 SG |
50 | #ifndef O_PATH |
51 | #define O_PATH 010000000 | |
52 | #endif | |
53 | ||
54 | #ifndef O_NOFOLLOW | |
55 | #define O_NOFOLLOW 00400000 | |
56 | #endif | |
57 | ||
ac2cecc4 | 58 | lxc_log_define(utils, lxc); |
e3642c43 | 59 | |
4295c5de SH |
60 | /* |
61 | * if path is btrfs, tries to remove it and any subvolumes beneath it | |
62 | */ | |
63 | extern bool btrfs_try_remove_subvol(const char *path); | |
64 | ||
41dc7155 | 65 | static int _recursive_rmdir(const char *dirname, dev_t pdev, |
0cc417b2 | 66 | const char *exclude, int level, bool onedev) |
60bf62d4 | 67 | { |
f1258455 CB |
68 | __do_closedir DIR *dir = NULL; |
69 | int failed = 0; | |
70 | bool hadexclude = false; | |
71 | int ret; | |
74f96976 | 72 | struct dirent *direntp; |
d726953a | 73 | char pathname[PATH_MAX]; |
60bf62d4 SH |
74 | |
75 | dir = opendir(dirname); | |
f1258455 CB |
76 | if (!dir) |
77 | return log_error(-1, "Failed to open \"%s\"", dirname); | |
60bf62d4 | 78 | |
74f96976 | 79 | while ((direntp = readdir(dir))) { |
60bf62d4 | 80 | int rc; |
f1258455 | 81 | struct stat mystat; |
60bf62d4 | 82 | |
60bf62d4 SH |
83 | if (!strcmp(direntp->d_name, ".") || |
84 | !strcmp(direntp->d_name, "..")) | |
85 | continue; | |
86 | ||
d726953a CB |
87 | rc = snprintf(pathname, PATH_MAX, "%s/%s", dirname, direntp->d_name); |
88 | if (rc < 0 || rc >= PATH_MAX) { | |
7be6bcd5 | 89 | ERROR("The name of path is too long"); |
f1258455 | 90 | failed = 1; |
60bf62d4 SH |
91 | continue; |
92 | } | |
18aa217b SH |
93 | |
94 | if (!level && exclude && !strcmp(direntp->d_name, exclude)) { | |
95 | ret = rmdir(pathname); | |
96 | if (ret < 0) { | |
f1258455 | 97 | switch (errno) { |
18aa217b | 98 | case ENOTEMPTY: |
7be6bcd5 | 99 | INFO("Not deleting snapshot \"%s\"", pathname); |
18aa217b SH |
100 | hadexclude = true; |
101 | break; | |
102 | case ENOTDIR: | |
103 | ret = unlink(pathname); | |
104 | if (ret) | |
7be6bcd5 | 105 | INFO("Failed to remove \"%s\"", pathname); |
18aa217b SH |
106 | break; |
107 | default: | |
7be6bcd5 | 108 | SYSERROR("Failed to rmdir \"%s\"", pathname); |
18aa217b SH |
109 | failed = 1; |
110 | break; | |
111 | } | |
112 | } | |
7be6bcd5 | 113 | |
18aa217b SH |
114 | continue; |
115 | } | |
116 | ||
60bf62d4 SH |
117 | ret = lstat(pathname, &mystat); |
118 | if (ret) { | |
7be6bcd5 | 119 | SYSERROR("Failed to stat \"%s\"", pathname); |
4295c5de | 120 | failed = 1; |
60bf62d4 SH |
121 | continue; |
122 | } | |
b14fc100 | 123 | |
4295c5de | 124 | if (onedev && mystat.st_dev != pdev) { |
4295c5de | 125 | if (btrfs_try_remove_subvol(pathname)) |
7be6bcd5 | 126 | INFO("Removed btrfs subvolume at \"%s\"", pathname); |
60bf62d4 | 127 | continue; |
4295c5de | 128 | } |
b14fc100 | 129 | |
60bf62d4 | 130 | if (S_ISDIR(mystat.st_mode)) { |
f1258455 CB |
131 | if (_recursive_rmdir(pathname, pdev, exclude, level + 1, onedev) < 0) |
132 | failed = 1; | |
60bf62d4 | 133 | } else { |
066210f0 CB |
134 | ret = unlink(pathname); |
135 | if (ret < 0) { | |
136 | __do_close int fd = -EBADF; | |
137 | ||
138 | fd = open(pathname, O_RDONLY | O_CLOEXEC | O_NONBLOCK); | |
139 | if (fd >= 0) { | |
140 | /* The file might be marked immutable. */ | |
141 | int attr = 0; | |
142 | ret = ioctl(fd, FS_IOC_GETFLAGS, &attr); | |
143 | if (ret < 0) | |
144 | SYSERROR("Failed to retrieve file flags"); | |
145 | attr &= ~FS_IMMUTABLE_FL; | |
146 | ret = ioctl(fd, FS_IOC_SETFLAGS, &attr); | |
147 | if (ret < 0) | |
148 | SYSERROR("Failed to set file flags"); | |
149 | } | |
150 | ||
151 | ret = unlink(pathname); | |
152 | if (ret < 0) { | |
153 | SYSERROR("Failed to delete \"%s\"", pathname); | |
154 | failed = 1; | |
155 | } | |
60bf62d4 SH |
156 | } |
157 | } | |
158 | } | |
159 | ||
4295c5de | 160 | if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) { |
7be6bcd5 | 161 | SYSERROR("Failed to delete \"%s\"", dirname); |
f1258455 | 162 | failed = 1; |
60bf62d4 SH |
163 | } |
164 | ||
4355ab5f | 165 | return failed ? -1 : 0; |
60bf62d4 SH |
166 | } |
167 | ||
f1258455 CB |
168 | /* |
169 | * In overlayfs, st_dev is unreliable. So on overlayfs we don't do the | |
170 | * lxc_rmdir_onedev(). | |
0cc417b2 | 171 | */ |
f1258455 | 172 | static inline bool is_native_overlayfs(const char *path) |
0cc417b2 | 173 | { |
f1258455 CB |
174 | return has_fs_type(path, OVERLAY_SUPER_MAGIC) || |
175 | has_fs_type(path, OVERLAYFS_SUPER_MAGIC); | |
0cc417b2 SH |
176 | } |
177 | ||
4355ab5f | 178 | /* returns 0 on success, -1 if there were any failures */ |
41dc7155 | 179 | extern int lxc_rmdir_onedev(const char *path, const char *exclude) |
60bf62d4 SH |
180 | { |
181 | struct stat mystat; | |
0cc417b2 SH |
182 | bool onedev = true; |
183 | ||
41dc7155 | 184 | if (is_native_overlayfs(path)) |
0cc417b2 | 185 | onedev = false; |
60bf62d4 SH |
186 | |
187 | if (lstat(path, &mystat) < 0) { | |
067650d0 SH |
188 | if (errno == ENOENT) |
189 | return 0; | |
41dc7155 | 190 | |
f1258455 | 191 | return log_error_errno(-1, errno, "Failed to stat \"%s\"", path); |
60bf62d4 SH |
192 | } |
193 | ||
0cc417b2 | 194 | return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev); |
60bf62d4 SH |
195 | } |
196 | ||
9ddaf3bf | 197 | /* borrowed from iproute2 */ |
7c11d57a | 198 | extern int get_u16(unsigned short *val, const char *arg, int base) |
9ddaf3bf JHS |
199 | { |
200 | unsigned long res; | |
201 | char *ptr; | |
202 | ||
203 | if (!arg || !*arg) | |
204 | return -1; | |
205 | ||
09bbd745 | 206 | errno = 0; |
9ddaf3bf | 207 | res = strtoul(arg, &ptr, base); |
09bbd745 | 208 | if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0) |
9ddaf3bf JHS |
209 | return -1; |
210 | ||
211 | *val = res; | |
212 | ||
213 | return 0; | |
214 | } | |
215 | ||
6099dd5a | 216 | int mkdir_p(const char *dir, mode_t mode) |
1b09f2c0 | 217 | { |
3ce74686 SH |
218 | const char *tmp = dir; |
219 | const char *orig = dir; | |
7be6bcd5 | 220 | |
c5e7a7ac | 221 | do { |
f1258455 | 222 | __do_free char *makeme = NULL; |
6099dd5a | 223 | int ret; |
6099dd5a | 224 | |
860fc865 RW |
225 | dir = tmp + strspn(tmp, "/"); |
226 | tmp = dir + strcspn(dir, "/"); | |
b14fc100 | 227 | |
d74325c4 | 228 | makeme = strndup(orig, dir - orig); |
6099dd5a | 229 | if (!makeme) |
f1258455 | 230 | return ret_set_errno(-1, ENOMEM); |
6099dd5a CB |
231 | |
232 | ret = mkdir(makeme, mode); | |
f1258455 CB |
233 | if (ret < 0 && errno != EEXIST) |
234 | return log_error_errno(-1, errno, "Failed to create directory \"%s\"", makeme); | |
6099dd5a CB |
235 | |
236 | } while (tmp != dir); | |
1b09f2c0 | 237 | |
98663823 | 238 | return 0; |
1b09f2c0 | 239 | } |
2a59a681 | 240 | |
44b9ae4b | 241 | char *get_rundir() |
9e60f51d | 242 | { |
97a696c6 | 243 | char *rundir; |
f1258455 | 244 | size_t len; |
97a696c6 | 245 | const char *homedir; |
9650c735 | 246 | struct stat sb; |
9e60f51d | 247 | |
b14fc100 | 248 | if (stat(RUNTIME_PATH, &sb) < 0) |
9650c735 | 249 | return NULL; |
9650c735 | 250 | |
f1258455 CB |
251 | if (geteuid() == sb.st_uid || getegid() == sb.st_gid) |
252 | return strdup(RUNTIME_PATH); | |
97a696c6 SG |
253 | |
254 | rundir = getenv("XDG_RUNTIME_DIR"); | |
f1258455 CB |
255 | if (rundir) |
256 | return strdup(rundir); | |
97a696c6 | 257 | |
7be6bcd5 | 258 | INFO("XDG_RUNTIME_DIR isn't set in the environment"); |
44b9ae4b | 259 | homedir = getenv("HOME"); |
f1258455 CB |
260 | if (!homedir) |
261 | return log_error(NULL, "HOME isn't set in the environment"); | |
97a696c6 | 262 | |
f1258455 CB |
263 | len = strlen(homedir) + 17; |
264 | rundir = malloc(sizeof(char) * len); | |
b14fc100 | 265 | if (!rundir) |
266 | return NULL; | |
267 | ||
f1258455 | 268 | snprintf(rundir, len, "%s/.cache/lxc/run/", homedir); |
9e60f51d DE |
269 | return rundir; |
270 | } | |
271 | ||
9be53773 SH |
272 | int wait_for_pid(pid_t pid) |
273 | { | |
274 | int status, ret; | |
275 | ||
276 | again: | |
277 | ret = waitpid(pid, &status, 0); | |
278 | if (ret == -1) { | |
71b9b8ed | 279 | if (errno == EINTR) |
9be53773 | 280 | goto again; |
b14fc100 | 281 | |
9be53773 SH |
282 | return -1; |
283 | } | |
b14fc100 | 284 | |
9be53773 SH |
285 | if (ret != pid) |
286 | goto again; | |
b14fc100 | 287 | |
9be53773 SH |
288 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) |
289 | return -1; | |
b14fc100 | 290 | |
9be53773 SH |
291 | return 0; |
292 | } | |
c797a220 | 293 | |
39293f22 CB |
294 | int wait_for_pidfd(int pidfd) |
295 | { | |
296 | int ret; | |
297 | siginfo_t info = { | |
298 | .si_signo = 0, | |
299 | }; | |
300 | ||
301 | do { | |
302 | ret = waitid(P_PIDFD, pidfd, &info, __WALL | WEXITED); | |
303 | } while (ret < 0 && errno == EINTR); | |
304 | ||
305 | return !ret && WIFEXITED(info.si_status) && WEXITSTATUS(info.si_status) == 0; | |
306 | } | |
307 | ||
c797a220 CS |
308 | int lxc_wait_for_pid_status(pid_t pid) |
309 | { | |
310 | int status, ret; | |
311 | ||
312 | again: | |
313 | ret = waitpid(pid, &status, 0); | |
314 | if (ret == -1) { | |
315 | if (errno == EINTR) | |
316 | goto again; | |
b14fc100 | 317 | |
c797a220 CS |
318 | return -1; |
319 | } | |
b14fc100 | 320 | |
c797a220 CS |
321 | if (ret != pid) |
322 | goto again; | |
b14fc100 | 323 | |
c797a220 CS |
324 | return status; |
325 | } | |
92f023dc | 326 | |
fa2bb6ba SH |
327 | #ifdef HAVE_OPENSSL |
328 | #include <openssl/evp.h> | |
41246cee | 329 | |
f1258455 CB |
330 | static int do_sha1_hash(const char *buf, int buflen, unsigned char *md_value, |
331 | unsigned int *md_len) | |
41246cee | 332 | { |
fa2bb6ba SH |
333 | EVP_MD_CTX *mdctx; |
334 | const EVP_MD *md; | |
335 | ||
336 | md = EVP_get_digestbyname("sha1"); | |
f1258455 CB |
337 | if (!md) |
338 | return log_error(-1, "Unknown message digest: sha1\n"); | |
fa2bb6ba | 339 | |
b138bfcf | 340 | mdctx = EVP_MD_CTX_create(); |
fa2bb6ba SH |
341 | EVP_DigestInit_ex(mdctx, md, NULL); |
342 | EVP_DigestUpdate(mdctx, buf, buflen); | |
343 | EVP_DigestFinal_ex(mdctx, md_value, md_len); | |
b138bfcf | 344 | EVP_MD_CTX_destroy(mdctx); |
fa2bb6ba SH |
345 | |
346 | return 0; | |
41246cee DE |
347 | } |
348 | ||
7c3d3976 | 349 | int sha1sum_file(char *fnam, unsigned char *digest, unsigned int *md_len) |
3ce74686 | 350 | { |
f1258455 CB |
351 | __do_free char *buf = NULL; |
352 | __do_fclose FILE *f = NULL; | |
3ce74686 | 353 | int ret; |
3ce74686 SH |
354 | long flen; |
355 | ||
356 | if (!fnam) | |
357 | return -1; | |
b14fc100 | 358 | |
025ed0f3 | 359 | f = fopen_cloexec(fnam, "r"); |
f1258455 CB |
360 | if (!f) |
361 | return log_error_errno(-1, errno, "Failed to open template \"%s\"", fnam); | |
b14fc100 | 362 | |
f1258455 CB |
363 | if (fseek(f, 0, SEEK_END) < 0) |
364 | return log_error_errno(-1, errno, "Failed to seek to end of template"); | |
b14fc100 | 365 | |
f1258455 CB |
366 | flen = ftell(f); |
367 | if (flen < 0) | |
368 | return log_error_errno(-1, errno, "Failed to tell size of template"); | |
b14fc100 | 369 | |
f1258455 CB |
370 | if (fseek(f, 0, SEEK_SET) < 0) |
371 | return log_error_errno(-1, errno, "Failed to seek to start of template"); | |
b14fc100 | 372 | |
f1258455 CB |
373 | buf = malloc(flen + 1); |
374 | if (!buf) | |
375 | return log_error_errno(-1, ENOMEM, "Out of memory"); | |
b14fc100 | 376 | |
f1258455 CB |
377 | if (fread(buf, 1, flen, f) != flen) |
378 | return log_error_errno(-1, errno, "Failed to read template"); | |
b14fc100 | 379 | |
3ce74686 | 380 | buf[flen] = '\0'; |
fa2bb6ba | 381 | ret = do_sha1_hash(buf, flen, (void *)digest, md_len); |
3ce74686 SH |
382 | return ret; |
383 | } | |
384 | #endif | |
61a1d519 | 385 | |
8bd8018e | 386 | struct lxc_popen_FILE *lxc_popen(const char *command) |
ebec9176 | 387 | { |
3f323207 | 388 | int ret; |
ebec9176 AM |
389 | int pipe_fds[2]; |
390 | pid_t child_pid; | |
8bd8018e | 391 | struct lxc_popen_FILE *fp = NULL; |
ebec9176 | 392 | |
8bd8018e CB |
393 | ret = pipe2(pipe_fds, O_CLOEXEC); |
394 | if (ret < 0) | |
ebec9176 | 395 | return NULL; |
ebec9176 AM |
396 | |
397 | child_pid = fork(); | |
8bd8018e CB |
398 | if (child_pid < 0) |
399 | goto on_error; | |
400 | ||
401 | if (!child_pid) { | |
402 | sigset_t mask; | |
403 | ||
404 | close(pipe_fds[0]); | |
405 | ||
406 | /* duplicate stdout */ | |
407 | if (pipe_fds[1] != STDOUT_FILENO) | |
408 | ret = dup2(pipe_fds[1], STDOUT_FILENO); | |
409 | else | |
410 | ret = fcntl(pipe_fds[1], F_SETFD, 0); | |
411 | if (ret < 0) { | |
412 | close(pipe_fds[1]); | |
03f618af | 413 | _exit(EXIT_FAILURE); |
3f323207 CB |
414 | } |
415 | ||
8bd8018e CB |
416 | /* duplicate stderr */ |
417 | if (pipe_fds[1] != STDERR_FILENO) | |
418 | ret = dup2(pipe_fds[1], STDERR_FILENO); | |
419 | else | |
420 | ret = fcntl(pipe_fds[1], F_SETFD, 0); | |
421 | close(pipe_fds[1]); | |
422 | if (ret < 0) | |
03f618af | 423 | _exit(EXIT_FAILURE); |
8bd8018e CB |
424 | |
425 | /* unblock all signals */ | |
426 | ret = sigfillset(&mask); | |
427 | if (ret < 0) | |
03f618af | 428 | _exit(EXIT_FAILURE); |
8bd8018e | 429 | |
b467714b | 430 | ret = pthread_sigmask(SIG_UNBLOCK, &mask, NULL); |
8bd8018e | 431 | if (ret < 0) |
03f618af | 432 | _exit(EXIT_FAILURE); |
8bd8018e | 433 | |
ecfa5693 | 434 | /* check if /bin/sh exist, otherwise try Android location /system/bin/sh */ |
435 | if (file_exists("/bin/sh")) | |
436 | execl("/bin/sh", "sh", "-c", command, (char *)NULL); | |
437 | else | |
438 | execl("/system/bin/sh", "sh", "-c", command, (char *)NULL); | |
439 | ||
03f618af | 440 | _exit(127); |
ebec9176 AM |
441 | } |
442 | ||
8bd8018e CB |
443 | close(pipe_fds[1]); |
444 | pipe_fds[1] = -1; | |
ebec9176 | 445 | |
8bd8018e CB |
446 | fp = malloc(sizeof(*fp)); |
447 | if (!fp) | |
448 | goto on_error; | |
b14fc100 | 449 | |
7e50ec0b | 450 | memset(fp, 0, sizeof(*fp)); |
ebec9176 AM |
451 | |
452 | fp->child_pid = child_pid; | |
8bd8018e | 453 | fp->pipe = pipe_fds[0]; |
ebec9176 | 454 | |
7e50ec0b CB |
455 | /* From now on, closing fp->f will also close fp->pipe. So only ever |
456 | * call fclose(fp->f). | |
457 | */ | |
8bd8018e CB |
458 | fp->f = fdopen(pipe_fds[0], "r"); |
459 | if (!fp->f) | |
460 | goto on_error; | |
ebec9176 | 461 | |
8bd8018e | 462 | return fp; |
ebec9176 | 463 | |
8bd8018e | 464 | on_error: |
7e50ec0b CB |
465 | /* We can only close pipe_fds[0] if fdopen() didn't succeed or wasn't |
466 | * called yet. Otherwise the fd belongs to the file opened by fdopen() | |
467 | * since it isn't dup()ed. | |
468 | */ | |
469 | if (fp && !fp->f && pipe_fds[0] >= 0) | |
8bd8018e CB |
470 | close(pipe_fds[0]); |
471 | ||
472 | if (pipe_fds[1] >= 0) | |
473 | close(pipe_fds[1]); | |
ebec9176 | 474 | |
7e50ec0b CB |
475 | if (fp && fp->f) |
476 | fclose(fp->f); | |
477 | ||
478 | if (fp) | |
479 | free(fp); | |
480 | ||
ebec9176 AM |
481 | return NULL; |
482 | } | |
483 | ||
8bd8018e | 484 | int lxc_pclose(struct lxc_popen_FILE *fp) |
ebec9176 | 485 | { |
ebec9176 | 486 | pid_t wait_pid; |
8bd8018e | 487 | int wstatus = 0; |
ebec9176 | 488 | |
8bd8018e | 489 | if (!fp) |
ebec9176 | 490 | return -1; |
ebec9176 AM |
491 | |
492 | do { | |
8bd8018e CB |
493 | wait_pid = waitpid(fp->child_pid, &wstatus, 0); |
494 | } while (wait_pid < 0 && errno == EINTR); | |
ebec9176 | 495 | |
8bd8018e CB |
496 | fclose(fp->f); |
497 | free(fp); | |
498 | ||
499 | if (wait_pid < 0) | |
ebec9176 | 500 | return -1; |
ebec9176 AM |
501 | |
502 | return wstatus; | |
503 | } | |
504 | ||
508c263e SH |
505 | int randseed(bool srand_it) |
506 | { | |
4110345b | 507 | __do_fclose FILE *f = NULL; |
508c263e | 508 | /* |
7be6bcd5 | 509 | * srand pre-seed function based on /dev/urandom |
510 | */ | |
091045f8 | 511 | unsigned int seed = time(NULL) + getpid(); |
508c263e | 512 | |
4110345b | 513 | f = fopen("/dev/urandom", "re"); |
508c263e SH |
514 | if (f) { |
515 | int ret = fread(&seed, sizeof(seed), 1, f); | |
516 | if (ret != 1) | |
7be6bcd5 | 517 | SYSDEBUG("Unable to fread /dev/urandom, fallback to time+pid rand seed"); |
508c263e SH |
518 | } |
519 | ||
520 | if (srand_it) | |
521 | srand(seed); | |
522 | ||
523 | return seed; | |
524 | } | |
5d897655 SH |
525 | |
526 | uid_t get_ns_uid(uid_t orig) | |
527 | { | |
4110345b CB |
528 | __do_free char *line = NULL; |
529 | __do_fclose FILE *f = NULL; | |
5d897655 SH |
530 | size_t sz = 0; |
531 | uid_t nsid, hostid, range; | |
7be6bcd5 | 532 | |
4110345b | 533 | f = fopen("/proc/self/uid_map", "re"); |
f1258455 CB |
534 | if (!f) |
535 | return log_error_errno(0, errno, "Failed to open uid_map"); | |
5d897655 SH |
536 | |
537 | while (getline(&line, &sz, f) != -1) { | |
538 | if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3) | |
539 | continue; | |
b14fc100 | 540 | |
4110345b CB |
541 | if (hostid <= orig && hostid + range > orig) |
542 | return nsid += orig - hostid; | |
5d897655 SH |
543 | } |
544 | ||
4110345b | 545 | return LXC_INVALID_UID; |
b962868f CB |
546 | } |
547 | ||
548 | gid_t get_ns_gid(gid_t orig) | |
549 | { | |
4110345b CB |
550 | __do_free char *line = NULL; |
551 | __do_fclose FILE *f = NULL; | |
b962868f CB |
552 | size_t sz = 0; |
553 | gid_t nsid, hostid, range; | |
7be6bcd5 | 554 | |
4110345b | 555 | f = fopen("/proc/self/gid_map", "re"); |
f1258455 CB |
556 | if (!f) |
557 | return log_error_errno(0, errno, "Failed to open gid_map"); | |
b962868f CB |
558 | |
559 | while (getline(&line, &sz, f) != -1) { | |
560 | if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3) | |
561 | continue; | |
562 | ||
4110345b CB |
563 | if (hostid <= orig && hostid + range > orig) |
564 | return nsid += orig - hostid; | |
b962868f CB |
565 | } |
566 | ||
4110345b | 567 | return LXC_INVALID_GID; |
5d897655 | 568 | } |
c476bdce SH |
569 | |
570 | bool dir_exists(const char *path) | |
571 | { | |
572 | struct stat sb; | |
573 | int ret; | |
574 | ||
575 | ret = stat(path, &sb); | |
576 | if (ret < 0) | |
1a0e70ac | 577 | /* Could be something other than eexist, just say "no". */ |
c476bdce | 578 | return false; |
b14fc100 | 579 | |
c476bdce SH |
580 | return S_ISDIR(sb.st_mode); |
581 | } | |
93c379f0 ÇO |
582 | |
583 | /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS. | |
584 | * FNV has good anti collision properties and we're not worried | |
585 | * about pre-image resistance or one-way-ness, we're just trying to make | |
586 | * the name unique in the 108 bytes of space we have. | |
587 | */ | |
588 | uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) | |
589 | { | |
590 | unsigned char *bp; | |
591 | ||
7be6bcd5 | 592 | for(bp = buf; bp < (unsigned char *)buf + len; bp++) { |
93c379f0 ÇO |
593 | /* xor the bottom with the current octet */ |
594 | hval ^= (uint64_t)*bp; | |
595 | ||
596 | /* gcc optimised: | |
597 | * multiply by the 64 bit FNV magic prime mod 2^64 | |
598 | */ | |
599 | hval += (hval << 1) + (hval << 4) + (hval << 5) + | |
600 | (hval << 7) + (hval << 8) + (hval << 40); | |
601 | } | |
602 | ||
603 | return hval; | |
604 | } | |
2c6f3fc9 | 605 | |
f6310f18 | 606 | bool is_shared_mountpoint(const char *path) |
2c6f3fc9 | 607 | { |
c4382ee2 CB |
608 | __do_fclose FILE *f = NULL; |
609 | __do_free char *line = NULL; | |
2c6f3fc9 | 610 | int i; |
c4382ee2 | 611 | size_t len = 0; |
2c6f3fc9 | 612 | |
4110345b | 613 | f = fopen("/proc/self/mountinfo", "re"); |
2c6f3fc9 SH |
614 | if (!f) |
615 | return 0; | |
b14fc100 | 616 | |
c4382ee2 CB |
617 | while (getline(&line, &len, f) > 0) { |
618 | char *slider1, *slider2; | |
619 | ||
620 | for (slider1 = line, i = 0; slider1 && i < 4; i++) | |
621 | slider1 = strchr(slider1 + 1, ' '); | |
622 | ||
623 | if (!slider1) | |
2c6f3fc9 | 624 | continue; |
b14fc100 | 625 | |
c4382ee2 CB |
626 | slider2 = strchr(slider1 + 1, ' '); |
627 | if (!slider2) | |
2c6f3fc9 | 628 | continue; |
b14fc100 | 629 | |
c4382ee2 CB |
630 | *slider2 = '\0'; |
631 | if (strcmp(slider1 + 1, path) == 0) { | |
f6310f18 | 632 | /* This is the path. Is it shared? */ |
c4382ee2 CB |
633 | slider1 = strchr(slider2 + 1, ' '); |
634 | if (slider1 && strstr(slider1, "shared:")) | |
f6310f18 | 635 | return true; |
2c6f3fc9 SH |
636 | } |
637 | } | |
b14fc100 | 638 | |
f6310f18 LT |
639 | return false; |
640 | } | |
641 | ||
642 | /* | |
643 | * Detect whether / is mounted MS_SHARED. The only way I know of to | |
644 | * check that is through /proc/self/mountinfo. | |
645 | * I'm only checking for /. If the container rootfs or mount location | |
646 | * is MS_SHARED, but not '/', then you're out of luck - figuring that | |
647 | * out would be too much work to be worth it. | |
648 | */ | |
649 | int detect_shared_rootfs(void) | |
650 | { | |
651 | if (is_shared_mountpoint("/")) | |
652 | return 1; | |
7be6bcd5 | 653 | |
2c6f3fc9 SH |
654 | return 0; |
655 | } | |
0e6e3a41 | 656 | |
37ef15bb CB |
657 | bool switch_to_ns(pid_t pid, const char *ns) |
658 | { | |
f62cf1d4 | 659 | __do_close int fd = -EBADF; |
b280bc38 CB |
660 | int ret; |
661 | char nspath[STRLITERALLEN("/proc//ns/") | |
662 | + INTTYPE_TO_STRLEN(pid_t) | |
663 | + LXC_NAMESPACE_NAME_MAX]; | |
51d0854c DY |
664 | |
665 | /* Switch to new ns */ | |
b280bc38 CB |
666 | ret = snprintf(nspath, sizeof(nspath), "/proc/%d/ns/%s", pid, ns); |
667 | if (ret < 0 || ret >= sizeof(nspath)) | |
51d0854c DY |
668 | return false; |
669 | ||
b280bc38 | 670 | fd = open(nspath, O_RDONLY | O_CLOEXEC); |
f1258455 CB |
671 | if (fd < 0) |
672 | return log_error_errno(false, errno, "Failed to open \"%s\"", nspath); | |
51d0854c DY |
673 | |
674 | ret = setns(fd, 0); | |
f1258455 CB |
675 | if (ret) |
676 | return log_error_errno(false, errno, "Failed to set process %d to \"%s\" of %d", pid, ns, fd); | |
b14fc100 | 677 | |
51d0854c DY |
678 | return true; |
679 | } | |
680 | ||
b7f954bb SH |
681 | /* |
682 | * looking at fs/proc_namespace.c, it appears we can | |
683 | * actually expect the rootfs entry to very specifically contain | |
684 | * " - rootfs rootfs " | |
685 | * IIUC, so long as we've chrooted so that rootfs is not our root, | |
686 | * the rootfs entry should always be skipped in mountinfo contents. | |
687 | */ | |
fa454c8e | 688 | bool detect_ramfs_rootfs(void) |
b7f954bb | 689 | { |
4110345b CB |
690 | __do_free char *line = NULL; |
691 | __do_free void *fopen_cache = NULL; | |
692 | __do_fclose FILE *f = NULL; | |
fa454c8e | 693 | size_t len = 0; |
b7f954bb | 694 | |
4110345b CB |
695 | f = fopen_cached("/proc/self/mountinfo", "re", &fopen_cache); |
696 | if (!f) | |
fa454c8e CB |
697 | return false; |
698 | ||
699 | while (getline(&line, &len, f) != -1) { | |
4110345b CB |
700 | int i; |
701 | char *p, *p2; | |
702 | ||
fa454c8e CB |
703 | for (p = line, i = 0; p && i < 4; i++) |
704 | p = strchr(p + 1, ' '); | |
b7f954bb SH |
705 | if (!p) |
706 | continue; | |
b14fc100 | 707 | |
fa454c8e | 708 | p2 = strchr(p + 1, ' '); |
b7f954bb SH |
709 | if (!p2) |
710 | continue; | |
711 | *p2 = '\0'; | |
fa454c8e | 712 | if (strcmp(p + 1, "/") == 0) { |
1a0e70ac | 713 | /* This is '/'. Is it the ramfs? */ |
fa454c8e | 714 | p = strchr(p2 + 1, '-'); |
4110345b | 715 | if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) |
fa454c8e | 716 | return true; |
b7f954bb SH |
717 | } |
718 | } | |
b14fc100 | 719 | |
fa454c8e | 720 | return false; |
b7f954bb SH |
721 | } |
722 | ||
37ef15bb CB |
723 | char *on_path(const char *cmd, const char *rootfs) |
724 | { | |
f1258455 CB |
725 | __do_free char *path = NULL; |
726 | char *entry = NULL; | |
d726953a | 727 | char cmdpath[PATH_MAX]; |
0e6e3a41 SG |
728 | int ret; |
729 | ||
730 | path = getenv("PATH"); | |
731 | if (!path) | |
8afb3e61 | 732 | return NULL; |
0e6e3a41 SG |
733 | |
734 | path = strdup(path); | |
735 | if (!path) | |
8afb3e61 | 736 | return NULL; |
0e6e3a41 | 737 | |
f1258455 | 738 | lxc_iterate_parts(entry, path, ":") { |
9d9c111c | 739 | if (rootfs) |
d726953a | 740 | ret = snprintf(cmdpath, PATH_MAX, "%s/%s/%s", rootfs, |
37ef15bb | 741 | entry, cmd); |
9d9c111c | 742 | else |
d726953a CB |
743 | ret = snprintf(cmdpath, PATH_MAX, "%s/%s", entry, cmd); |
744 | if (ret < 0 || ret >= PATH_MAX) | |
84c5549b | 745 | continue; |
0e6e3a41 | 746 | |
f1258455 | 747 | if (access(cmdpath, X_OK) == 0) |
8afb3e61 | 748 | return strdup(cmdpath); |
0e6e3a41 SG |
749 | } |
750 | ||
8afb3e61 | 751 | return NULL; |
0e6e3a41 | 752 | } |
76a26f55 | 753 | |
12983ba4 SH |
754 | bool cgns_supported(void) |
755 | { | |
756 | return file_exists("/proc/self/ns/cgroup"); | |
757 | } | |
758 | ||
9d9c111c SH |
759 | /* historically lxc-init has been under /usr/lib/lxc and under |
760 | * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc. | |
761 | */ | |
762 | char *choose_init(const char *rootfs) | |
763 | { | |
764 | char *retv = NULL; | |
370ec268 SF |
765 | const char *empty = "", |
766 | *tmp; | |
9d9c111c | 767 | int ret, env_set = 0; |
9d9c111c SH |
768 | |
769 | if (!getenv("PATH")) { | |
770 | if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0)) | |
771 | SYSERROR("Failed to setenv"); | |
b14fc100 | 772 | |
9d9c111c SH |
773 | env_set = 1; |
774 | } | |
775 | ||
776 | retv = on_path("init.lxc", rootfs); | |
777 | ||
7be6bcd5 | 778 | if (env_set) |
9d9c111c SH |
779 | if (unsetenv("PATH")) |
780 | SYSERROR("Failed to unsetenv"); | |
9d9c111c SH |
781 | |
782 | if (retv) | |
783 | return retv; | |
784 | ||
785 | retv = malloc(PATH_MAX); | |
786 | if (!retv) | |
787 | return NULL; | |
788 | ||
789 | if (rootfs) | |
370ec268 | 790 | tmp = rootfs; |
9d9c111c | 791 | else |
370ec268 SF |
792 | tmp = empty; |
793 | ||
794 | ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc"); | |
9d9c111c | 795 | if (ret < 0 || ret >= PATH_MAX) { |
7be6bcd5 | 796 | ERROR("The name of path is too long"); |
9d9c111c SH |
797 | goto out1; |
798 | } | |
b14fc100 | 799 | |
e57cd7e9 | 800 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
801 | return retv; |
802 | ||
370ec268 | 803 | ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init"); |
9d9c111c | 804 | if (ret < 0 || ret >= PATH_MAX) { |
7be6bcd5 | 805 | ERROR("The name of path is too long"); |
9d9c111c SH |
806 | goto out1; |
807 | } | |
b14fc100 | 808 | |
e57cd7e9 | 809 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
810 | return retv; |
811 | ||
370ec268 | 812 | ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp); |
9d9c111c | 813 | if (ret < 0 || ret >= PATH_MAX) { |
7be6bcd5 | 814 | ERROR("The name of path is too long"); |
9d9c111c SH |
815 | goto out1; |
816 | } | |
b14fc100 | 817 | |
e57cd7e9 | 818 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
819 | return retv; |
820 | ||
370ec268 | 821 | ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp); |
9d9c111c | 822 | if (ret < 0 || ret >= PATH_MAX) { |
7be6bcd5 | 823 | ERROR("The name of path is too long"); |
9d9c111c SH |
824 | goto out1; |
825 | } | |
b14fc100 | 826 | |
e57cd7e9 | 827 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
828 | return retv; |
829 | ||
830 | /* | |
831 | * Last resort, look for the statically compiled init.lxc which we | |
832 | * hopefully bind-mounted in. | |
833 | * If we are called during container setup, and we get to this point, | |
834 | * then the init.lxc.static from the host will need to be bind-mounted | |
835 | * in. So we return NULL here to indicate that. | |
836 | */ | |
837 | if (rootfs) | |
838 | goto out1; | |
839 | ||
840 | ret = snprintf(retv, PATH_MAX, "/init.lxc.static"); | |
841 | if (ret < 0 || ret >= PATH_MAX) { | |
842 | WARN("Nonsense - name /lxc.init.static too long"); | |
843 | goto out1; | |
844 | } | |
b14fc100 | 845 | |
e57cd7e9 | 846 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
847 | return retv; |
848 | ||
849 | out1: | |
850 | free(retv); | |
851 | return NULL; | |
852 | } | |
735f2c6e | 853 | |
6010a416 SG |
854 | /* |
855 | * Given the '-t' template option to lxc-create, figure out what to | |
856 | * do. If the template is a full executable path, use that. If it | |
857 | * is something like 'sshd', then return $templatepath/lxc-sshd. | |
858 | * On success return the template, on error return NULL. | |
859 | */ | |
860 | char *get_template_path(const char *t) | |
861 | { | |
862 | int ret, len; | |
863 | char *tpath; | |
864 | ||
b275efe3 RK |
865 | if (t[0] == '/') { |
866 | if (access(t, X_OK) == 0) { | |
867 | return strdup(t); | |
868 | } else { | |
869 | SYSERROR("Bad template pathname: %s", t); | |
870 | return NULL; | |
871 | } | |
6010a416 SG |
872 | } |
873 | ||
874 | len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1; | |
b14fc100 | 875 | |
6010a416 SG |
876 | tpath = malloc(len); |
877 | if (!tpath) | |
878 | return NULL; | |
b14fc100 | 879 | |
6010a416 SG |
880 | ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t); |
881 | if (ret < 0 || ret >= len) { | |
882 | free(tpath); | |
883 | return NULL; | |
884 | } | |
b14fc100 | 885 | |
6010a416 SG |
886 | if (access(tpath, X_OK) < 0) { |
887 | SYSERROR("bad template: %s", t); | |
888 | free(tpath); | |
889 | return NULL; | |
890 | } | |
891 | ||
892 | return tpath; | |
893 | } | |
0a4be28d | 894 | |
592fd47a SH |
895 | /* |
896 | * @path: a pathname where / replaced with '\0'. | |
897 | * @offsetp: pointer to int showing which path segment was last seen. | |
898 | * Updated on return to reflect the next segment. | |
899 | * @fulllen: full original path length. | |
900 | * Returns a pointer to the next path segment, or NULL if done. | |
901 | */ | |
902 | static char *get_nextpath(char *path, int *offsetp, int fulllen) | |
903 | { | |
904 | int offset = *offsetp; | |
905 | ||
906 | if (offset >= fulllen) | |
907 | return NULL; | |
908 | ||
91d9cab6 | 909 | while (offset < fulllen && path[offset] != '\0') |
592fd47a | 910 | offset++; |
b14fc100 | 911 | |
91d9cab6 | 912 | while (offset < fulllen && path[offset] == '\0') |
592fd47a SH |
913 | offset++; |
914 | ||
915 | *offsetp = offset; | |
7be6bcd5 | 916 | |
592fd47a SH |
917 | return (offset < fulllen) ? &path[offset] : NULL; |
918 | } | |
919 | ||
920 | /* | |
921 | * Check that @subdir is a subdir of @dir. @len is the length of | |
922 | * @dir (to avoid having to recalculate it). | |
923 | */ | |
924 | static bool is_subdir(const char *subdir, const char *dir, size_t len) | |
925 | { | |
926 | size_t subdirlen = strlen(subdir); | |
927 | ||
928 | if (subdirlen < len) | |
929 | return false; | |
b14fc100 | 930 | |
592fd47a SH |
931 | if (strncmp(subdir, dir, len) != 0) |
932 | return false; | |
b14fc100 | 933 | |
592fd47a SH |
934 | if (dir[len-1] == '/') |
935 | return true; | |
b14fc100 | 936 | |
592fd47a SH |
937 | if (subdir[len] == '/' || subdirlen == len) |
938 | return true; | |
b14fc100 | 939 | |
592fd47a SH |
940 | return false; |
941 | } | |
942 | ||
943 | /* | |
944 | * Check if the open fd is a symlink. Return -ELOOP if it is. Return | |
945 | * -ENOENT if we couldn't fstat. Return 0 if the fd is ok. | |
946 | */ | |
947 | static int check_symlink(int fd) | |
948 | { | |
949 | struct stat sb; | |
b14fc100 | 950 | int ret; |
951 | ||
952 | ret = fstat(fd, &sb); | |
592fd47a SH |
953 | if (ret < 0) |
954 | return -ENOENT; | |
b14fc100 | 955 | |
592fd47a SH |
956 | if (S_ISLNK(sb.st_mode)) |
957 | return -ELOOP; | |
b14fc100 | 958 | |
592fd47a SH |
959 | return 0; |
960 | } | |
961 | ||
962 | /* | |
963 | * Open a file or directory, provided that it contains no symlinks. | |
964 | * | |
965 | * CAVEAT: This function must not be used for other purposes than container | |
966 | * setup before executing the container's init | |
967 | */ | |
968 | static int open_if_safe(int dirfd, const char *nextpath) | |
969 | { | |
970 | int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW); | |
1a0e70ac | 971 | if (newfd >= 0) /* Was not a symlink, all good. */ |
592fd47a SH |
972 | return newfd; |
973 | ||
974 | if (errno == ELOOP) | |
975 | return newfd; | |
976 | ||
977 | if (errno == EPERM || errno == EACCES) { | |
1a0e70ac CB |
978 | /* We're not root (cause we got EPERM) so try opening with |
979 | * O_PATH. | |
980 | */ | |
592fd47a SH |
981 | newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW); |
982 | if (newfd >= 0) { | |
1a0e70ac CB |
983 | /* O_PATH will return an fd for symlinks. We know |
984 | * nextpath wasn't a symlink at last openat, so if fd is | |
985 | * now a link, then something * fishy is going on. | |
592fd47a SH |
986 | */ |
987 | int ret = check_symlink(newfd); | |
988 | if (ret < 0) { | |
989 | close(newfd); | |
990 | newfd = ret; | |
991 | } | |
992 | } | |
993 | } | |
994 | ||
995 | return newfd; | |
996 | } | |
997 | ||
998 | /* | |
999 | * Open a path intending for mounting, ensuring that the final path | |
1000 | * is inside the container's rootfs. | |
1001 | * | |
1002 | * CAVEAT: This function must not be used for other purposes than container | |
1003 | * setup before executing the container's init | |
1004 | * | |
1005 | * @target: path to be opened | |
1006 | * @prefix_skip: a part of @target in which to ignore symbolic links. This | |
1007 | * would be the container's rootfs. | |
1008 | * | |
1009 | * Return an open fd for the path, or <0 on error. | |
1010 | */ | |
1011 | static int open_without_symlink(const char *target, const char *prefix_skip) | |
1012 | { | |
1013 | int curlen = 0, dirfd, fulllen, i; | |
7be6bcd5 | 1014 | char *dup; |
592fd47a SH |
1015 | |
1016 | fulllen = strlen(target); | |
1017 | ||
1018 | /* make sure prefix-skip makes sense */ | |
01074e5b | 1019 | if (prefix_skip && strlen(prefix_skip) > 0) { |
592fd47a SH |
1020 | curlen = strlen(prefix_skip); |
1021 | if (!is_subdir(target, prefix_skip, curlen)) { | |
7be6bcd5 | 1022 | ERROR("WHOA there - target \"%s\" didn't start with prefix \"%s\"", |
1023 | target, prefix_skip); | |
592fd47a SH |
1024 | return -EINVAL; |
1025 | } | |
b14fc100 | 1026 | |
592fd47a SH |
1027 | /* |
1028 | * get_nextpath() expects the curlen argument to be | |
1029 | * on a (turned into \0) / or before it, so decrement | |
1030 | * curlen to make sure that happens | |
1031 | */ | |
1032 | if (curlen) | |
1033 | curlen--; | |
1034 | } else { | |
1035 | prefix_skip = "/"; | |
1036 | curlen = 0; | |
1037 | } | |
1038 | ||
1039 | /* Make a copy of target which we can hack up, and tokenize it */ | |
1040 | if ((dup = strdup(target)) == NULL) { | |
7be6bcd5 | 1041 | ERROR("Out of memory checking for symbolic link"); |
592fd47a SH |
1042 | return -ENOMEM; |
1043 | } | |
b14fc100 | 1044 | |
592fd47a SH |
1045 | for (i = 0; i < fulllen; i++) { |
1046 | if (dup[i] == '/') | |
1047 | dup[i] = '\0'; | |
1048 | } | |
1049 | ||
1050 | dirfd = open(prefix_skip, O_RDONLY); | |
7be6bcd5 | 1051 | if (dirfd < 0) { |
1052 | SYSERROR("Failed to open path \"%s\"", prefix_skip); | |
592fd47a | 1053 | goto out; |
7be6bcd5 | 1054 | } |
b14fc100 | 1055 | |
51a8a74c | 1056 | for (;;) { |
592fd47a SH |
1057 | int newfd, saved_errno; |
1058 | char *nextpath; | |
1059 | ||
1060 | if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL) | |
1061 | goto out; | |
b14fc100 | 1062 | |
592fd47a SH |
1063 | newfd = open_if_safe(dirfd, nextpath); |
1064 | saved_errno = errno; | |
1065 | close(dirfd); | |
b14fc100 | 1066 | |
592fd47a SH |
1067 | dirfd = newfd; |
1068 | if (newfd < 0) { | |
1069 | errno = saved_errno; | |
1070 | if (errno == ELOOP) | |
1071 | SYSERROR("%s in %s was a symbolic link!", nextpath, target); | |
b14fc100 | 1072 | |
592fd47a SH |
1073 | goto out; |
1074 | } | |
1075 | } | |
1076 | ||
1077 | out: | |
1078 | free(dup); | |
1079 | return dirfd; | |
1080 | } | |
1081 | ||
1082 | /* | |
1083 | * Safely mount a path into a container, ensuring that the mount target | |
1084 | * is under the container's @rootfs. (If @rootfs is NULL, then the container | |
1085 | * uses the host's /) | |
1086 | * | |
1087 | * CAVEAT: This function must not be used for other purposes than container | |
1088 | * setup before executing the container's init | |
1089 | */ | |
1090 | int safe_mount(const char *src, const char *dest, const char *fstype, | |
1091 | unsigned long flags, const void *data, const char *rootfs) | |
1092 | { | |
1a0e70ac CB |
1093 | int destfd, ret, saved_errno; |
1094 | /* Only needs enough for /proc/self/fd/<fd>. */ | |
1095 | char srcbuf[50], destbuf[50]; | |
1096 | int srcfd = -1; | |
592fd47a SH |
1097 | const char *mntsrc = src; |
1098 | ||
1099 | if (!rootfs) | |
1100 | rootfs = ""; | |
1101 | ||
1102 | /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */ | |
1103 | if (flags & MS_BIND && src && src[0] != '/') { | |
7be6bcd5 | 1104 | INFO("This is a relative bind mount"); |
b14fc100 | 1105 | |
592fd47a SH |
1106 | srcfd = open_without_symlink(src, NULL); |
1107 | if (srcfd < 0) | |
1108 | return srcfd; | |
b14fc100 | 1109 | |
6da73634 RK |
1110 | ret = snprintf(srcbuf, sizeof(srcbuf), "/proc/self/fd/%d", srcfd); |
1111 | if (ret < 0 || ret >= (int)sizeof(srcbuf)) { | |
592fd47a SH |
1112 | close(srcfd); |
1113 | ERROR("Out of memory"); | |
1114 | return -EINVAL; | |
1115 | } | |
1116 | mntsrc = srcbuf; | |
1117 | } | |
1118 | ||
1119 | destfd = open_without_symlink(dest, rootfs); | |
1120 | if (destfd < 0) { | |
88e078ba CB |
1121 | if (srcfd != -1) { |
1122 | saved_errno = errno; | |
592fd47a | 1123 | close(srcfd); |
88e078ba CB |
1124 | errno = saved_errno; |
1125 | } | |
b14fc100 | 1126 | |
592fd47a SH |
1127 | return destfd; |
1128 | } | |
1129 | ||
6da73634 RK |
1130 | ret = snprintf(destbuf, sizeof(destbuf), "/proc/self/fd/%d", destfd); |
1131 | if (ret < 0 || ret >= (int)sizeof(destbuf)) { | |
592fd47a SH |
1132 | if (srcfd != -1) |
1133 | close(srcfd); | |
b14fc100 | 1134 | |
592fd47a SH |
1135 | close(destfd); |
1136 | ERROR("Out of memory"); | |
1137 | return -EINVAL; | |
1138 | } | |
1139 | ||
1140 | ret = mount(mntsrc, destbuf, fstype, flags, data); | |
1141 | saved_errno = errno; | |
1142 | if (srcfd != -1) | |
1143 | close(srcfd); | |
b14fc100 | 1144 | |
592fd47a SH |
1145 | close(destfd); |
1146 | if (ret < 0) { | |
1147 | errno = saved_errno; | |
7be6bcd5 | 1148 | SYSERROR("Failed to mount \"%s\" onto \"%s\"", src ? src : "(null)", dest); |
592fd47a SH |
1149 | return ret; |
1150 | } | |
1151 | ||
1152 | return 0; | |
1153 | } | |
1154 | ||
ced03a01 SH |
1155 | /* |
1156 | * Mount a proc under @rootfs if proc self points to a pid other than | |
1157 | * my own. This is needed to have a known-good proc mount for setting | |
1158 | * up LSMs both at container startup and attach. | |
1159 | * | |
1160 | * @rootfs : the rootfs where proc should be mounted | |
1161 | * | |
1162 | * Returns < 0 on failure, 0 if the correct proc was already mounted | |
1163 | * and 1 if a new proc was mounted. | |
f267d666 BP |
1164 | * |
1165 | * NOTE: not to be called from inside the container namespace! | |
ced03a01 | 1166 | */ |
943144d9 | 1167 | int lxc_mount_proc_if_needed(const char *rootfs) |
ced03a01 | 1168 | { |
7be6bcd5 | 1169 | char path[PATH_MAX] = {0}; |
6b1ba5d6 | 1170 | int link_to_pid, linklen, mypid, ret; |
40464e8a | 1171 | char link[INTTYPE_TO_STRLEN(pid_t)] = {0}; |
ced03a01 | 1172 | |
d726953a CB |
1173 | ret = snprintf(path, PATH_MAX, "%s/proc/self", rootfs); |
1174 | if (ret < 0 || ret >= PATH_MAX) { | |
7be6bcd5 | 1175 | SYSERROR("The name of proc path is too long"); |
ced03a01 SH |
1176 | return -1; |
1177 | } | |
fc2ad9dc | 1178 | |
979a0d93 | 1179 | linklen = readlink(path, link, sizeof(link)); |
fc2ad9dc | 1180 | |
d726953a CB |
1181 | ret = snprintf(path, PATH_MAX, "%s/proc", rootfs); |
1182 | if (ret < 0 || ret >= PATH_MAX) { | |
7be6bcd5 | 1183 | SYSERROR("The name of proc path is too long"); |
d539a2b2 CB |
1184 | return -1; |
1185 | } | |
fc2ad9dc CB |
1186 | |
1187 | /* /proc not mounted */ | |
1188 | if (linklen < 0) { | |
1189 | if (mkdir(path, 0755) && errno != EEXIST) | |
1190 | return -1; | |
b14fc100 | 1191 | |
ced03a01 | 1192 | goto domount; |
979a0d93 | 1193 | } else if (linklen >= sizeof(link)) { |
6b1ba5d6 | 1194 | link[linklen - 1] = '\0'; |
7be6bcd5 | 1195 | ERROR("Readlink returned truncated content: \"%s\"", link); |
6b1ba5d6 | 1196 | return -1; |
fc2ad9dc CB |
1197 | } |
1198 | ||
0059379f | 1199 | mypid = lxc_raw_getpid(); |
6b1ba5d6 CB |
1200 | INFO("I am %d, /proc/self points to \"%s\"", mypid, link); |
1201 | ||
2d036cca CB |
1202 | if (lxc_safe_int(link, &link_to_pid) < 0) |
1203 | return -1; | |
fc2ad9dc | 1204 | |
6b1ba5d6 CB |
1205 | /* correct procfs is already mounted */ |
1206 | if (link_to_pid == mypid) | |
1207 | return 0; | |
fc2ad9dc | 1208 | |
6b1ba5d6 CB |
1209 | ret = umount2(path, MNT_DETACH); |
1210 | if (ret < 0) | |
7be6bcd5 | 1211 | SYSWARN("Failed to umount \"%s\" with MNT_DETACH", path); |
ced03a01 SH |
1212 | |
1213 | domount: | |
fc2ad9dc | 1214 | /* rootfs is NULL */ |
6b1ba5d6 | 1215 | if (!strcmp(rootfs, "")) |
f267d666 BP |
1216 | ret = mount("proc", path, "proc", 0, NULL); |
1217 | else | |
1218 | ret = safe_mount("proc", path, "proc", 0, NULL, rootfs); | |
f267d666 | 1219 | if (ret < 0) |
ced03a01 | 1220 | return -1; |
f267d666 | 1221 | |
7be6bcd5 | 1222 | INFO("Mounted /proc in container for security transition"); |
ced03a01 SH |
1223 | return 1; |
1224 | } | |
69aeabac | 1225 | |
f8dd0275 | 1226 | int open_devnull(void) |
69aeabac | 1227 | { |
f8dd0275 | 1228 | int fd = open("/dev/null", O_RDWR); |
f8dd0275 AM |
1229 | if (fd < 0) |
1230 | SYSERROR("Can't open /dev/null"); | |
1231 | ||
1232 | return fd; | |
1233 | } | |
69aeabac | 1234 | |
f8dd0275 AM |
1235 | int set_stdfds(int fd) |
1236 | { | |
bbbf65ee CB |
1237 | int ret; |
1238 | ||
69aeabac TA |
1239 | if (fd < 0) |
1240 | return -1; | |
1241 | ||
bbbf65ee CB |
1242 | ret = dup2(fd, STDIN_FILENO); |
1243 | if (ret < 0) | |
f8dd0275 | 1244 | return -1; |
bbbf65ee CB |
1245 | |
1246 | ret = dup2(fd, STDOUT_FILENO); | |
1247 | if (ret < 0) | |
f8dd0275 | 1248 | return -1; |
bbbf65ee CB |
1249 | |
1250 | ret = dup2(fd, STDERR_FILENO); | |
1251 | if (ret < 0) | |
f8dd0275 AM |
1252 | return -1; |
1253 | ||
1254 | return 0; | |
1255 | } | |
1256 | ||
1257 | int null_stdfds(void) | |
1258 | { | |
1259 | int ret = -1; | |
b14fc100 | 1260 | int fd; |
f8dd0275 | 1261 | |
b14fc100 | 1262 | fd = open_devnull(); |
f8dd0275 AM |
1263 | if (fd >= 0) { |
1264 | ret = set_stdfds(fd); | |
1265 | close(fd); | |
1266 | } | |
69aeabac | 1267 | |
69aeabac TA |
1268 | return ret; |
1269 | } | |
ccb4cabe | 1270 | |
330ae3d3 | 1271 | /* Check whether a signal is blocked by a process. */ |
de3c491b | 1272 | /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */ |
40464e8a | 1273 | #define __PROC_STATUS_LEN (6 + INTTYPE_TO_STRLEN(pid_t) + 7 + 1) |
573ad77f | 1274 | bool task_blocks_signal(pid_t pid, int signal) |
330ae3d3 | 1275 | { |
4110345b CB |
1276 | __do_free char *line = NULL; |
1277 | __do_fclose FILE *f = NULL; | |
330ae3d3 | 1278 | int ret; |
7be6bcd5 | 1279 | char status[__PROC_STATUS_LEN] = {0}; |
573ad77f | 1280 | uint64_t sigblk = 0, one = 1; |
eabf1ea9 CB |
1281 | size_t n = 0; |
1282 | bool bret = false; | |
330ae3d3 | 1283 | |
de3c491b CB |
1284 | ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid); |
1285 | if (ret < 0 || ret >= __PROC_STATUS_LEN) | |
330ae3d3 CB |
1286 | return bret; |
1287 | ||
4110345b | 1288 | f = fopen(status, "re"); |
330ae3d3 | 1289 | if (!f) |
4110345b | 1290 | return false; |
330ae3d3 CB |
1291 | |
1292 | while (getline(&line, &n, f) != -1) { | |
573ad77f CB |
1293 | char *numstr; |
1294 | ||
eabf1ea9 | 1295 | if (strncmp(line, "SigBlk:", 7)) |
6fbcbe3b CB |
1296 | continue; |
1297 | ||
573ad77f CB |
1298 | numstr = lxc_trim_whitespace_in_place(line + 7); |
1299 | ret = lxc_safe_uint64(numstr, &sigblk, 16); | |
1300 | if (ret < 0) | |
4110345b | 1301 | return false; |
573ad77f CB |
1302 | |
1303 | break; | |
330ae3d3 CB |
1304 | } |
1305 | ||
573ad77f | 1306 | if (sigblk & (one << (signal - 1))) |
330ae3d3 CB |
1307 | bret = true; |
1308 | ||
330ae3d3 CB |
1309 | return bret; |
1310 | } | |
000dfda7 | 1311 | |
a687256f CB |
1312 | int lxc_preserve_ns(const int pid, const char *ns) |
1313 | { | |
1314 | int ret; | |
a052913d CB |
1315 | /* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */ |
1316 | #define __NS_PATH_LEN 50 | |
1317 | char path[__NS_PATH_LEN]; | |
a687256f | 1318 | |
4d8ac866 CB |
1319 | /* This way we can use this function to also check whether namespaces |
1320 | * are supported by the kernel by passing in the NULL or the empty | |
1321 | * string. | |
1322 | */ | |
a052913d | 1323 | ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid, |
4d8ac866 CB |
1324 | !ns || strcmp(ns, "") == 0 ? "" : "/", |
1325 | !ns || strcmp(ns, "") == 0 ? "" : ns); | |
ea918412 | 1326 | if (ret < 0 || (size_t)ret >= __NS_PATH_LEN) { |
1327 | errno = EFBIG; | |
1328 | return -1; | |
1329 | } | |
a687256f CB |
1330 | |
1331 | return open(path, O_RDONLY | O_CLOEXEC); | |
1332 | } | |
6bc2eafe | 1333 | |
464c4611 | 1334 | bool lxc_switch_uid_gid(uid_t uid, gid_t gid) |
dbaf55a3 | 1335 | { |
db2d1af1 CB |
1336 | int ret = 0; |
1337 | ||
1338 | if (gid != LXC_INVALID_GID) { | |
1339 | ret = setgid(gid); | |
1340 | if (ret < 0) { | |
1341 | SYSERROR("Failed to switch to gid %d", gid); | |
464c4611 | 1342 | return false; |
db2d1af1 CB |
1343 | } |
1344 | NOTICE("Switched to gid %d", gid); | |
dbaf55a3 | 1345 | } |
dbaf55a3 | 1346 | |
db2d1af1 CB |
1347 | if (uid != LXC_INVALID_UID) { |
1348 | ret = setuid(uid); | |
1349 | if (ret < 0) { | |
1350 | SYSERROR("Failed to switch to uid %d", uid); | |
464c4611 | 1351 | return false; |
db2d1af1 CB |
1352 | } |
1353 | NOTICE("Switched to uid %d", uid); | |
dbaf55a3 | 1354 | } |
dbaf55a3 | 1355 | |
464c4611 | 1356 | return true; |
dbaf55a3 CB |
1357 | } |
1358 | ||
46b3a2f6 | 1359 | /* Simple convenience function which enables uniform logging. */ |
8af07f82 | 1360 | bool lxc_setgroups(int size, gid_t list[]) |
dbaf55a3 CB |
1361 | { |
1362 | if (setgroups(size, list) < 0) { | |
8af07f82 CB |
1363 | SYSERROR("Failed to setgroups()"); |
1364 | return false; | |
dbaf55a3 | 1365 | } |
8af07f82 | 1366 | NOTICE("Dropped additional groups"); |
dbaf55a3 | 1367 | |
8af07f82 | 1368 | return true; |
dbaf55a3 | 1369 | } |
c6868a1f CB |
1370 | |
1371 | static int lxc_get_unused_loop_dev_legacy(char *loop_name) | |
1372 | { | |
1373 | struct dirent *dp; | |
1374 | struct loop_info64 lo64; | |
1375 | DIR *dir; | |
1376 | int dfd = -1, fd = -1, ret = -1; | |
1377 | ||
1378 | dir = opendir("/dev"); | |
2f32e37e | 1379 | if (!dir) { |
1380 | SYSERROR("Failed to open \"/dev\""); | |
c6868a1f | 1381 | return -1; |
2f32e37e | 1382 | } |
c6868a1f CB |
1383 | |
1384 | while ((dp = readdir(dir))) { | |
c6868a1f CB |
1385 | if (strncmp(dp->d_name, "loop", 4) != 0) |
1386 | continue; | |
1387 | ||
1388 | dfd = dirfd(dir); | |
1389 | if (dfd < 0) | |
1390 | continue; | |
1391 | ||
1392 | fd = openat(dfd, dp->d_name, O_RDWR); | |
1393 | if (fd < 0) | |
1394 | continue; | |
1395 | ||
1396 | ret = ioctl(fd, LOOP_GET_STATUS64, &lo64); | |
1397 | if (ret < 0) { | |
1398 | if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 || | |
1399 | errno != ENXIO) { | |
1400 | close(fd); | |
1401 | fd = -1; | |
1402 | continue; | |
1403 | } | |
1404 | } | |
1405 | ||
1406 | ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name); | |
1407 | if (ret < 0 || ret >= LO_NAME_SIZE) { | |
1408 | close(fd); | |
1409 | fd = -1; | |
1410 | continue; | |
1411 | } | |
1412 | ||
1413 | break; | |
1414 | } | |
1415 | ||
1416 | closedir(dir); | |
1417 | ||
1418 | if (fd < 0) | |
1419 | return -1; | |
1420 | ||
1421 | return fd; | |
1422 | } | |
1423 | ||
1424 | static int lxc_get_unused_loop_dev(char *name_loop) | |
1425 | { | |
1426 | int loop_nr, ret; | |
1427 | int fd_ctl = -1, fd_tmp = -1; | |
1428 | ||
1429 | fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC); | |
2f32e37e | 1430 | if (fd_ctl < 0) { |
1431 | SYSERROR("Failed to open loop control"); | |
c6868a1f | 1432 | return -ENODEV; |
2f32e37e | 1433 | } |
c6868a1f CB |
1434 | |
1435 | loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE); | |
2f32e37e | 1436 | if (loop_nr < 0) { |
1437 | SYSERROR("Failed to get loop control"); | |
c6868a1f | 1438 | goto on_error; |
2f32e37e | 1439 | } |
c6868a1f CB |
1440 | |
1441 | ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr); | |
1442 | if (ret < 0 || ret >= LO_NAME_SIZE) | |
1443 | goto on_error; | |
1444 | ||
1445 | fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); | |
b11738d7 | 1446 | if (fd_tmp < 0) { |
1447 | /* on Android loop devices are moved under /dev/block, give it a shot */ | |
1448 | ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/block/loop%d", loop_nr); | |
1449 | if (ret < 0 || ret >= LO_NAME_SIZE) | |
1450 | goto on_error; | |
1451 | ||
1452 | fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); | |
1453 | if (fd_tmp < 0) | |
1454 | SYSERROR("Failed to open loop \"%s\"", name_loop); | |
1455 | } | |
c6868a1f CB |
1456 | |
1457 | on_error: | |
1458 | close(fd_ctl); | |
1459 | return fd_tmp; | |
1460 | } | |
1461 | ||
1462 | int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags) | |
1463 | { | |
1464 | int ret; | |
1465 | struct loop_info64 lo64; | |
1466 | int fd_img = -1, fret = -1, fd_loop = -1; | |
1467 | ||
1468 | fd_loop = lxc_get_unused_loop_dev(loop_dev); | |
1469 | if (fd_loop < 0) { | |
2f32e37e | 1470 | if (fd_loop != -ENODEV) |
1471 | goto on_error; | |
1472 | ||
1473 | fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev); | |
1474 | if (fd_loop < 0) | |
c6868a1f CB |
1475 | goto on_error; |
1476 | } | |
1477 | ||
1478 | fd_img = open(source, O_RDWR | O_CLOEXEC); | |
2f32e37e | 1479 | if (fd_img < 0) { |
1480 | SYSERROR("Failed to open source \"%s\"", source); | |
c6868a1f | 1481 | goto on_error; |
2f32e37e | 1482 | } |
c6868a1f CB |
1483 | |
1484 | ret = ioctl(fd_loop, LOOP_SET_FD, fd_img); | |
2f32e37e | 1485 | if (ret < 0) { |
1486 | SYSERROR("Failed to set loop fd"); | |
c6868a1f | 1487 | goto on_error; |
2f32e37e | 1488 | } |
c6868a1f CB |
1489 | |
1490 | memset(&lo64, 0, sizeof(lo64)); | |
1491 | lo64.lo_flags = flags; | |
1492 | ||
a70c9e85 JF |
1493 | strlcpy((char *)lo64.lo_file_name, source, LO_NAME_SIZE); |
1494 | ||
c6868a1f | 1495 | ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64); |
2f32e37e | 1496 | if (ret < 0) { |
1497 | SYSERROR("Failed to set loop status64"); | |
c6868a1f | 1498 | goto on_error; |
2f32e37e | 1499 | } |
c6868a1f CB |
1500 | |
1501 | fret = 0; | |
1502 | ||
1503 | on_error: | |
1504 | if (fd_img >= 0) | |
1505 | close(fd_img); | |
1506 | ||
1507 | if (fret < 0 && fd_loop >= 0) { | |
1508 | close(fd_loop); | |
1509 | fd_loop = -1; | |
1510 | } | |
1511 | ||
1512 | return fd_loop; | |
1513 | } | |
74251e49 CB |
1514 | |
1515 | int lxc_unstack_mountpoint(const char *path, bool lazy) | |
1516 | { | |
1517 | int ret; | |
1518 | int umounts = 0; | |
1519 | ||
1520 | pop_stack: | |
1521 | ret = umount2(path, lazy ? MNT_DETACH : 0); | |
1522 | if (ret < 0) { | |
1523 | /* We consider anything else than EINVAL deadly to prevent going | |
1524 | * into an infinite loop. (The other alternative is constantly | |
1525 | * parsing /proc/self/mountinfo which is yucky and probably | |
1526 | * racy.) | |
1527 | */ | |
1528 | if (errno != EINVAL) | |
1529 | return -errno; | |
1530 | } else { | |
b4a40f7b CB |
1531 | /* Just stop counting when this happens. That'd just be so |
1532 | * stupid that we won't even bother trying to report back the | |
1533 | * correct value anymore. | |
1534 | */ | |
1535 | if (umounts != INT_MAX) | |
1536 | umounts++; | |
b14fc100 | 1537 | |
74251e49 CB |
1538 | /* We succeeded in umounting. Make sure that there's no other |
1539 | * mountpoint stacked underneath. | |
1540 | */ | |
74251e49 CB |
1541 | goto pop_stack; |
1542 | } | |
1543 | ||
1544 | return umounts; | |
1545 | } | |
ea3a694f | 1546 | |
99a8edfc | 1547 | int run_command_internal(char *buf, size_t buf_size, int (*child_fn)(void *), void *args, bool wait_status) |
ea3a694f CB |
1548 | { |
1549 | pid_t child; | |
1550 | int ret, fret, pipefd[2]; | |
1551 | ssize_t bytes; | |
1552 | ||
46210729 | 1553 | /* Make sure our callers do not receive uninitialized memory. */ |
ea3a694f CB |
1554 | if (buf_size > 0 && buf) |
1555 | buf[0] = '\0'; | |
1556 | ||
1557 | if (pipe(pipefd) < 0) { | |
7be6bcd5 | 1558 | SYSERROR("Failed to create pipe"); |
ea3a694f CB |
1559 | return -1; |
1560 | } | |
1561 | ||
a59440be | 1562 | child = lxc_raw_clone(0, NULL); |
ea3a694f CB |
1563 | if (child < 0) { |
1564 | close(pipefd[0]); | |
1565 | close(pipefd[1]); | |
7be6bcd5 | 1566 | SYSERROR("Failed to create new process"); |
ea3a694f CB |
1567 | return -1; |
1568 | } | |
1569 | ||
1570 | if (child == 0) { | |
1571 | /* Close the read-end of the pipe. */ | |
1572 | close(pipefd[0]); | |
1573 | ||
1574 | /* Redirect std{err,out} to write-end of the | |
1575 | * pipe. | |
1576 | */ | |
1577 | ret = dup2(pipefd[1], STDOUT_FILENO); | |
1578 | if (ret >= 0) | |
1579 | ret = dup2(pipefd[1], STDERR_FILENO); | |
1580 | ||
1581 | /* Close the write-end of the pipe. */ | |
1582 | close(pipefd[1]); | |
1583 | ||
1584 | if (ret < 0) { | |
7be6bcd5 | 1585 | SYSERROR("Failed to duplicate std{err,out} file descriptor"); |
d8b3f9c3 | 1586 | _exit(EXIT_FAILURE); |
ea3a694f CB |
1587 | } |
1588 | ||
1589 | /* Does not return. */ | |
1590 | child_fn(args); | |
7be6bcd5 | 1591 | ERROR("Failed to exec command"); |
d8b3f9c3 | 1592 | _exit(EXIT_FAILURE); |
ea3a694f CB |
1593 | } |
1594 | ||
1595 | /* close the write-end of the pipe */ | |
1596 | close(pipefd[1]); | |
1597 | ||
7a643c7c | 1598 | if (buf && buf_size > 0) { |
a5bc6cb0 | 1599 | bytes = lxc_read_nointr(pipefd[0], buf, buf_size - 1); |
7a643c7c CB |
1600 | if (bytes > 0) |
1601 | buf[bytes - 1] = '\0'; | |
1602 | } | |
ea3a694f | 1603 | |
99a8edfc | 1604 | if (wait_status) |
1605 | fret = lxc_wait_for_pid_status(child); | |
1606 | else | |
1607 | fret = wait_for_pid(child); | |
1608 | ||
ea3a694f CB |
1609 | /* close the read-end of the pipe */ |
1610 | close(pipefd[0]); | |
1611 | ||
1612 | return fret; | |
1613 | } | |
04ad7ffe | 1614 | |
99a8edfc | 1615 | int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args) |
1616 | { | |
1617 | return run_command_internal(buf, buf_size, child_fn, args, false); | |
1618 | } | |
1619 | ||
1620 | int run_command_status(char *buf, size_t buf_size, int (*child_fn)(void *), void *args) | |
1621 | { | |
1622 | return run_command_internal(buf, buf_size, child_fn, args, true); | |
1623 | } | |
1624 | ||
d75c14e2 CB |
1625 | bool lxc_nic_exists(char *nic) |
1626 | { | |
1627 | #define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1 | |
1628 | char path[__LXC_SYS_CLASS_NET_LEN]; | |
1629 | int ret; | |
1630 | struct stat sb; | |
1631 | ||
1632 | if (!strcmp(nic, "none")) | |
1633 | return true; | |
1634 | ||
1635 | ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic); | |
1636 | if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN) | |
1637 | return false; | |
1638 | ||
1639 | ret = stat(path, &sb); | |
1640 | if (ret < 0) | |
1641 | return false; | |
1642 | ||
1643 | return true; | |
1644 | } | |
127c6e70 | 1645 | |
6222c3f4 CB |
1646 | uint64_t lxc_find_next_power2(uint64_t n) |
1647 | { | |
1648 | /* 0 is not valid input. We return 0 to the caller since 0 is not a | |
1649 | * valid power of two. | |
1650 | */ | |
1651 | if (n == 0) | |
1652 | return 0; | |
1653 | ||
1654 | if (!(n & (n - 1))) | |
1655 | return n; | |
1656 | ||
1657 | while (n & (n - 1)) | |
1658 | n = n & (n - 1); | |
1659 | ||
1660 | n = n << 1; | |
1661 | return n; | |
1662 | } | |
1fd0f41e | 1663 | |
4d8bdfa0 CB |
1664 | static int process_dead(/* takes */ int status_fd) |
1665 | { | |
f62cf1d4 | 1666 | __do_close int dupfd = -EBADF; |
4d8bdfa0 CB |
1667 | __do_free char *line = NULL; |
1668 | __do_fclose FILE *f = NULL; | |
1669 | int ret = 0; | |
1670 | size_t n = 0; | |
1671 | ||
1672 | dupfd = dup(status_fd); | |
1673 | if (dupfd < 0) | |
1674 | return -1; | |
1675 | ||
1676 | if (fd_cloexec(dupfd, true) < 0) | |
1677 | return -1; | |
1678 | ||
92bdc593 | 1679 | f = fdopen(dupfd, "re"); |
4d8bdfa0 CB |
1680 | if (!f) |
1681 | return -1; | |
4110345b CB |
1682 | |
1683 | /* Transfer ownership of fd. */ | |
92bdc593 | 1684 | move_fd(dupfd); |
4d8bdfa0 CB |
1685 | |
1686 | ret = 0; | |
1687 | while (getline(&line, &n, f) != -1) { | |
1688 | char *state; | |
1689 | ||
1690 | if (strncmp(line, "State:", 6)) | |
1691 | continue; | |
1692 | ||
1693 | state = lxc_trim_whitespace_in_place(line + 6); | |
1694 | /* only check whether process is dead or zombie for now */ | |
1695 | if (*state == 'X' || *state == 'Z') | |
1696 | ret = 1; | |
1697 | } | |
1698 | ||
1699 | return ret; | |
1700 | } | |
1701 | ||
1702 | int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd) | |
1fd0f41e CB |
1703 | { |
1704 | int ret; | |
1705 | pid_t ppid; | |
1706 | ||
b81689a1 CB |
1707 | ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0), |
1708 | prctl_arg(0), prctl_arg(0)); | |
1fd0f41e | 1709 | |
4d8bdfa0 | 1710 | /* verify that we haven't been orphaned in the meantime */ |
1fd0f41e | 1711 | ppid = (pid_t)syscall(SYS_getppid); |
4d8bdfa0 CB |
1712 | if (ppid == 0) { /* parent outside our pidns */ |
1713 | if (parent_status_fd < 0) | |
1714 | return 0; | |
1715 | ||
1716 | if (process_dead(parent_status_fd) == 1) | |
1717 | return raise(SIGKILL); | |
1718 | } else if (ppid != parent) { | |
1719 | return raise(SIGKILL); | |
1fd0f41e CB |
1720 | } |
1721 | ||
2f32e37e | 1722 | if (ret < 0) |
1fd0f41e | 1723 | return -1; |
1fd0f41e CB |
1724 | |
1725 | return 0; | |
1726 | } | |
7ad37670 | 1727 | |
a9d4ebc1 CB |
1728 | int fd_cloexec(int fd, bool cloexec) |
1729 | { | |
1730 | int oflags, nflags; | |
1731 | ||
1732 | oflags = fcntl(fd, F_GETFD, 0); | |
1733 | if (oflags < 0) | |
1734 | return -errno; | |
1735 | ||
1736 | if (cloexec) | |
1737 | nflags = oflags | FD_CLOEXEC; | |
1738 | else | |
1739 | nflags = oflags & ~FD_CLOEXEC; | |
1740 | ||
1741 | if (nflags == oflags) | |
1742 | return 0; | |
1743 | ||
1744 | if (fcntl(fd, F_SETFD, nflags) < 0) | |
1745 | return -errno; | |
1746 | ||
1747 | return 0; | |
1748 | } | |
d7ab0375 | 1749 | |
8408a9cc | 1750 | int lxc_rm_rf(const char *dirname) |
d7ab0375 | 1751 | { |
8e64b673 CB |
1752 | __do_closedir DIR *dir = NULL; |
1753 | int fret = 0; | |
d7ab0375 | 1754 | int ret; |
1755 | struct dirent *direntp; | |
d7ab0375 | 1756 | |
1757 | dir = opendir(dirname); | |
8e64b673 CB |
1758 | if (!dir) |
1759 | return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname); | |
d7ab0375 | 1760 | |
1761 | while ((direntp = readdir(dir))) { | |
8e64b673 | 1762 | __do_free char *pathname = NULL; |
d7ab0375 | 1763 | struct stat mystat; |
1764 | ||
1765 | if (!strcmp(direntp->d_name, ".") || | |
1766 | !strcmp(direntp->d_name, "..")) | |
1767 | continue; | |
1768 | ||
1769 | pathname = must_make_path(dirname, direntp->d_name, NULL); | |
d7ab0375 | 1770 | ret = lstat(pathname, &mystat); |
1771 | if (ret < 0) { | |
8e64b673 | 1772 | if (!fret) |
7be6bcd5 | 1773 | SYSWARN("Failed to stat \"%s\"", pathname); |
d7ab0375 | 1774 | |
8e64b673 CB |
1775 | fret = -1; |
1776 | continue; | |
d7ab0375 | 1777 | } |
1778 | ||
1779 | if (!S_ISDIR(mystat.st_mode)) | |
8e64b673 | 1780 | continue; |
d7ab0375 | 1781 | |
8408a9cc | 1782 | ret = lxc_rm_rf(pathname); |
d7ab0375 | 1783 | if (ret < 0) |
8e64b673 | 1784 | fret = -1; |
d7ab0375 | 1785 | } |
1786 | ||
1787 | ret = rmdir(dirname); | |
8e64b673 CB |
1788 | if (ret < 0) |
1789 | return log_warn_errno(-1, errno, "Failed to delete \"%s\"", dirname); | |
d7ab0375 | 1790 | |
8e64b673 | 1791 | return fret; |
d7ab0375 | 1792 | } |
b25291da | 1793 | |
4fef78bc | 1794 | int lxc_setup_keyring(char *keyring_label) |
b25291da CB |
1795 | { |
1796 | key_serial_t keyring; | |
1797 | int ret = 0; | |
1798 | ||
4fef78bc MB |
1799 | if (keyring_label) { |
1800 | if (lsm_keyring_label_set(keyring_label) < 0) { | |
1801 | ERROR("Couldn't set keyring label"); | |
1802 | } | |
1803 | } | |
1804 | ||
b25291da CB |
1805 | /* Try to allocate a new session keyring for the container to prevent |
1806 | * information leaks. | |
1807 | */ | |
1808 | keyring = keyctl(KEYCTL_JOIN_SESSION_KEYRING, prctl_arg(0), | |
1809 | prctl_arg(0), prctl_arg(0), prctl_arg(0)); | |
1810 | if (keyring < 0) { | |
1811 | switch (errno) { | |
1812 | case ENOSYS: | |
1813 | DEBUG("The keyctl() syscall is not supported or blocked"); | |
1814 | break; | |
1815 | case EACCES: | |
1816 | __fallthrough; | |
1817 | case EPERM: | |
1818 | DEBUG("Failed to access kernel keyring. Continuing..."); | |
1819 | break; | |
1820 | default: | |
1821 | SYSERROR("Failed to create kernel keyring"); | |
b25291da CB |
1822 | break; |
1823 | } | |
1824 | } | |
1825 | ||
1826 | return ret; | |
1827 | } | |
39293f22 CB |
1828 | |
1829 | bool lxc_can_use_pidfd(int pidfd) | |
1830 | { | |
1831 | int ret; | |
1832 | ||
1833 | if (pidfd < 0) | |
1834 | return log_error(false, "Kernel does not support pidfds"); | |
1835 | ||
39293f22 CB |
1836 | /* |
1837 | * We don't care whether or not children were in a waitable state. We | |
1838 | * just care whether waitid() recognizes P_PIDFD. | |
1839 | * | |
1840 | * Btw, while I have your attention, the above waitid() code is an | |
1841 | * excellent example of how _not_ to do flag-based kernel APIs. So if | |
1842 | * you ever go into kernel development or are already and you add this | |
1843 | * kind of flag potpourri even though you have read this comment shame | |
1844 | * on you. May the gods of operating system development have mercy on | |
1845 | * your soul because I won't. | |
1846 | */ | |
1847 | ret = waitid(P_PIDFD, pidfd, NULL, | |
1848 | /* Type of children to wait for. */ | |
1849 | __WALL | | |
1850 | /* How to wait for them. */ | |
1851 | WNOHANG | WNOWAIT | | |
1852 | /* What state to wait for. */ | |
1853 | WEXITED | WSTOPPED | WCONTINUED); | |
1854 | if (ret < 0) | |
1855 | return log_error_errno(false, errno, "Kernel does not support waiting on processes through pidfds"); | |
1856 | ||
8ad4fa68 CB |
1857 | ret = lxc_raw_pidfd_send_signal(pidfd, 0, NULL, 0); |
1858 | if (ret) | |
1859 | return log_error_errno(false, errno, "Kernel does not support sending singals through pidfds"); | |
1860 | ||
39293f22 CB |
1861 | return log_trace(true, "Kernel supports pidfds"); |
1862 | } | |
6aff5157 | 1863 | |
1864 | void fix_stdio_permissions(uid_t uid) | |
1865 | { | |
1866 | int std_fds[3] = {STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO}; | |
1867 | int devnull_fd = -1; | |
1868 | int ret; | |
1869 | int i = 0; | |
1870 | struct stat st; | |
1871 | struct stat null_st; | |
1872 | ||
1873 | devnull_fd = open_devnull(); | |
1874 | if (devnull_fd < 0) { | |
1875 | ERROR("Open /dev/null failed"); | |
1876 | goto out; | |
1877 | } | |
1878 | ||
1879 | ret = fstat(devnull_fd, &null_st); | |
1880 | ||
1881 | for (; i < 3; i++) { | |
1882 | ret = fstat(std_fds[i], &st); | |
1883 | if (ret != 0) { | |
1884 | ERROR("Failed to get fd %d stat", std_fds[i]); | |
1885 | continue; | |
1886 | } | |
1887 | ||
1888 | if (st.st_rdev == null_st.st_rdev) { | |
1889 | continue; | |
1890 | } | |
1891 | ||
1892 | ret = fchown(std_fds[i], uid, st.st_gid); | |
1893 | if (ret != 0) { | |
1894 | ERROR("Failed to change fd %d owner", std_fds[i]); | |
1895 | } | |
1896 | ||
1897 | ret = fchmod(std_fds[i], 0700); | |
1898 | if (ret != 0) { | |
1899 | ERROR("Failed to change fd %d mode", std_fds[i]); | |
1900 | } | |
1901 | } | |
1902 | ||
1903 | out: | |
1904 | if (devnull_fd >= 0) { | |
1905 | close(devnull_fd); | |
1906 | } | |
1907 | } |