]>
Commit | Line | Data |
---|---|---|
cc73685d | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
e3642c43 | 2 | |
d38dd64a CB |
3 | #ifndef _GNU_SOURCE |
4 | #define _GNU_SOURCE 1 | |
5 | #endif | |
7935833c | 6 | #define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */ |
643c1984 | 7 | #include <ctype.h> |
a1e5280d | 8 | #include <dirent.h> |
e3642c43 | 9 | #include <errno.h> |
a1e5280d | 10 | #include <fcntl.h> |
dbaf55a3 | 11 | #include <grp.h> |
7935833c | 12 | #include <inttypes.h> |
a1e5280d | 13 | #include <libgen.h> |
b467714b | 14 | #include <pthread.h> |
39293f22 | 15 | #include <signal.h> |
d983b93c | 16 | #include <stddef.h> |
a1e5280d CB |
17 | #include <stdio.h> |
18 | #include <stdlib.h> | |
61a1d519 | 19 | #include <string.h> |
e3642c43 | 20 | #include <sys/mman.h> |
6e4bb2e0 | 21 | #include <sys/mount.h> |
066210f0 CB |
22 | /* Needs to be after sys/mount.h header */ |
23 | #include <linux/fs.h> | |
a1e5280d CB |
24 | #include <sys/param.h> |
25 | #include <sys/prctl.h> | |
26 | #include <sys/stat.h> | |
9be53773 SH |
27 | #include <sys/types.h> |
28 | #include <sys/wait.h> | |
d38dd64a | 29 | #include <unistd.h> |
e3642c43 | 30 | |
d38dd64a | 31 | #include "config.h" |
e3642c43 | 32 | #include "log.h" |
4fef78bc | 33 | #include "lsm/lsm.h" |
025ed0f3 | 34 | #include "lxclock.h" |
c4382ee2 | 35 | #include "memory_utils.h" |
51d0854c | 36 | #include "namespace.h" |
e3db0162 | 37 | #include "parse.h" |
f40988c7 | 38 | #include "process_utils.h" |
b25291da | 39 | #include "syscall_wrappers.h" |
981f6029 | 40 | #include "utils.h" |
e3642c43 | 41 | |
43f984ea DJ |
42 | #ifndef HAVE_STRLCPY |
43 | #include "include/strlcpy.h" | |
44 | #endif | |
45 | ||
bd583214 DJ |
46 | #ifndef HAVE_STRLCAT |
47 | #include "include/strlcat.h" | |
48 | #endif | |
49 | ||
4928c718 SG |
50 | #ifndef O_PATH |
51 | #define O_PATH 010000000 | |
52 | #endif | |
53 | ||
54 | #ifndef O_NOFOLLOW | |
55 | #define O_NOFOLLOW 00400000 | |
56 | #endif | |
57 | ||
ac2cecc4 | 58 | lxc_log_define(utils, lxc); |
e3642c43 | 59 | |
4295c5de SH |
60 | /* |
61 | * if path is btrfs, tries to remove it and any subvolumes beneath it | |
62 | */ | |
63 | extern bool btrfs_try_remove_subvol(const char *path); | |
64 | ||
41dc7155 | 65 | static int _recursive_rmdir(const char *dirname, dev_t pdev, |
0cc417b2 | 66 | const char *exclude, int level, bool onedev) |
60bf62d4 | 67 | { |
f1258455 CB |
68 | __do_closedir DIR *dir = NULL; |
69 | int failed = 0; | |
70 | bool hadexclude = false; | |
71 | int ret; | |
74f96976 | 72 | struct dirent *direntp; |
d726953a | 73 | char pathname[PATH_MAX]; |
60bf62d4 SH |
74 | |
75 | dir = opendir(dirname); | |
f1258455 CB |
76 | if (!dir) |
77 | return log_error(-1, "Failed to open \"%s\"", dirname); | |
60bf62d4 | 78 | |
74f96976 | 79 | while ((direntp = readdir(dir))) { |
60bf62d4 | 80 | int rc; |
f1258455 | 81 | struct stat mystat; |
60bf62d4 | 82 | |
60bf62d4 SH |
83 | if (!strcmp(direntp->d_name, ".") || |
84 | !strcmp(direntp->d_name, "..")) | |
85 | continue; | |
86 | ||
d726953a CB |
87 | rc = snprintf(pathname, PATH_MAX, "%s/%s", dirname, direntp->d_name); |
88 | if (rc < 0 || rc >= PATH_MAX) { | |
7be6bcd5 | 89 | ERROR("The name of path is too long"); |
f1258455 | 90 | failed = 1; |
60bf62d4 SH |
91 | continue; |
92 | } | |
18aa217b SH |
93 | |
94 | if (!level && exclude && !strcmp(direntp->d_name, exclude)) { | |
95 | ret = rmdir(pathname); | |
96 | if (ret < 0) { | |
f1258455 | 97 | switch (errno) { |
18aa217b | 98 | case ENOTEMPTY: |
7be6bcd5 | 99 | INFO("Not deleting snapshot \"%s\"", pathname); |
18aa217b SH |
100 | hadexclude = true; |
101 | break; | |
102 | case ENOTDIR: | |
103 | ret = unlink(pathname); | |
104 | if (ret) | |
7be6bcd5 | 105 | INFO("Failed to remove \"%s\"", pathname); |
18aa217b SH |
106 | break; |
107 | default: | |
7be6bcd5 | 108 | SYSERROR("Failed to rmdir \"%s\"", pathname); |
18aa217b SH |
109 | failed = 1; |
110 | break; | |
111 | } | |
112 | } | |
7be6bcd5 | 113 | |
18aa217b SH |
114 | continue; |
115 | } | |
116 | ||
60bf62d4 SH |
117 | ret = lstat(pathname, &mystat); |
118 | if (ret) { | |
7be6bcd5 | 119 | SYSERROR("Failed to stat \"%s\"", pathname); |
4295c5de | 120 | failed = 1; |
60bf62d4 SH |
121 | continue; |
122 | } | |
b14fc100 | 123 | |
4295c5de | 124 | if (onedev && mystat.st_dev != pdev) { |
4295c5de | 125 | if (btrfs_try_remove_subvol(pathname)) |
7be6bcd5 | 126 | INFO("Removed btrfs subvolume at \"%s\"", pathname); |
60bf62d4 | 127 | continue; |
4295c5de | 128 | } |
b14fc100 | 129 | |
60bf62d4 | 130 | if (S_ISDIR(mystat.st_mode)) { |
f1258455 CB |
131 | if (_recursive_rmdir(pathname, pdev, exclude, level + 1, onedev) < 0) |
132 | failed = 1; | |
60bf62d4 | 133 | } else { |
066210f0 CB |
134 | ret = unlink(pathname); |
135 | if (ret < 0) { | |
136 | __do_close int fd = -EBADF; | |
137 | ||
138 | fd = open(pathname, O_RDONLY | O_CLOEXEC | O_NONBLOCK); | |
139 | if (fd >= 0) { | |
140 | /* The file might be marked immutable. */ | |
141 | int attr = 0; | |
142 | ret = ioctl(fd, FS_IOC_GETFLAGS, &attr); | |
143 | if (ret < 0) | |
144 | SYSERROR("Failed to retrieve file flags"); | |
145 | attr &= ~FS_IMMUTABLE_FL; | |
146 | ret = ioctl(fd, FS_IOC_SETFLAGS, &attr); | |
147 | if (ret < 0) | |
148 | SYSERROR("Failed to set file flags"); | |
149 | } | |
150 | ||
151 | ret = unlink(pathname); | |
152 | if (ret < 0) { | |
153 | SYSERROR("Failed to delete \"%s\"", pathname); | |
154 | failed = 1; | |
155 | } | |
60bf62d4 SH |
156 | } |
157 | } | |
158 | } | |
159 | ||
4295c5de | 160 | if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) { |
7be6bcd5 | 161 | SYSERROR("Failed to delete \"%s\"", dirname); |
f1258455 | 162 | failed = 1; |
60bf62d4 SH |
163 | } |
164 | ||
4355ab5f | 165 | return failed ? -1 : 0; |
60bf62d4 SH |
166 | } |
167 | ||
f1258455 CB |
168 | /* |
169 | * In overlayfs, st_dev is unreliable. So on overlayfs we don't do the | |
170 | * lxc_rmdir_onedev(). | |
0cc417b2 | 171 | */ |
f1258455 | 172 | static inline bool is_native_overlayfs(const char *path) |
0cc417b2 | 173 | { |
f1258455 CB |
174 | return has_fs_type(path, OVERLAY_SUPER_MAGIC) || |
175 | has_fs_type(path, OVERLAYFS_SUPER_MAGIC); | |
0cc417b2 SH |
176 | } |
177 | ||
4355ab5f | 178 | /* returns 0 on success, -1 if there were any failures */ |
41dc7155 | 179 | extern int lxc_rmdir_onedev(const char *path, const char *exclude) |
60bf62d4 SH |
180 | { |
181 | struct stat mystat; | |
0cc417b2 SH |
182 | bool onedev = true; |
183 | ||
41dc7155 | 184 | if (is_native_overlayfs(path)) |
0cc417b2 | 185 | onedev = false; |
60bf62d4 SH |
186 | |
187 | if (lstat(path, &mystat) < 0) { | |
067650d0 SH |
188 | if (errno == ENOENT) |
189 | return 0; | |
41dc7155 | 190 | |
f1258455 | 191 | return log_error_errno(-1, errno, "Failed to stat \"%s\"", path); |
60bf62d4 SH |
192 | } |
193 | ||
0cc417b2 | 194 | return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev); |
60bf62d4 SH |
195 | } |
196 | ||
9ddaf3bf | 197 | /* borrowed from iproute2 */ |
7c11d57a | 198 | extern int get_u16(unsigned short *val, const char *arg, int base) |
9ddaf3bf JHS |
199 | { |
200 | unsigned long res; | |
201 | char *ptr; | |
202 | ||
203 | if (!arg || !*arg) | |
059a1ec3 | 204 | return ret_errno(EINVAL); |
9ddaf3bf | 205 | |
09bbd745 | 206 | errno = 0; |
9ddaf3bf | 207 | res = strtoul(arg, &ptr, base); |
09bbd745 | 208 | if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0) |
059a1ec3 | 209 | return ret_errno(ERANGE); |
9ddaf3bf JHS |
210 | |
211 | *val = res; | |
212 | ||
213 | return 0; | |
214 | } | |
215 | ||
6099dd5a | 216 | int mkdir_p(const char *dir, mode_t mode) |
1b09f2c0 | 217 | { |
3ce74686 SH |
218 | const char *tmp = dir; |
219 | const char *orig = dir; | |
7be6bcd5 | 220 | |
c5e7a7ac | 221 | do { |
f1258455 | 222 | __do_free char *makeme = NULL; |
6099dd5a | 223 | int ret; |
6099dd5a | 224 | |
860fc865 RW |
225 | dir = tmp + strspn(tmp, "/"); |
226 | tmp = dir + strcspn(dir, "/"); | |
b14fc100 | 227 | |
d74325c4 | 228 | makeme = strndup(orig, dir - orig); |
6099dd5a | 229 | if (!makeme) |
f1258455 | 230 | return ret_set_errno(-1, ENOMEM); |
6099dd5a CB |
231 | |
232 | ret = mkdir(makeme, mode); | |
f1258455 CB |
233 | if (ret < 0 && errno != EEXIST) |
234 | return log_error_errno(-1, errno, "Failed to create directory \"%s\"", makeme); | |
6099dd5a CB |
235 | |
236 | } while (tmp != dir); | |
1b09f2c0 | 237 | |
98663823 | 238 | return 0; |
1b09f2c0 | 239 | } |
2a59a681 | 240 | |
39b72573 | 241 | char *get_rundir(void) |
9e60f51d | 242 | { |
8b961418 CB |
243 | __do_free char *rundir = NULL; |
244 | char *static_rundir; | |
245 | int ret; | |
f1258455 | 246 | size_t len; |
97a696c6 | 247 | const char *homedir; |
9650c735 | 248 | struct stat sb; |
9e60f51d | 249 | |
b14fc100 | 250 | if (stat(RUNTIME_PATH, &sb) < 0) |
9650c735 | 251 | return NULL; |
9650c735 | 252 | |
f1258455 CB |
253 | if (geteuid() == sb.st_uid || getegid() == sb.st_gid) |
254 | return strdup(RUNTIME_PATH); | |
97a696c6 | 255 | |
8b961418 CB |
256 | static_rundir = getenv("XDG_RUNTIME_DIR"); |
257 | if (static_rundir) | |
258 | return strdup(static_rundir); | |
97a696c6 | 259 | |
7be6bcd5 | 260 | INFO("XDG_RUNTIME_DIR isn't set in the environment"); |
44b9ae4b | 261 | homedir = getenv("HOME"); |
f1258455 CB |
262 | if (!homedir) |
263 | return log_error(NULL, "HOME isn't set in the environment"); | |
97a696c6 | 264 | |
f1258455 CB |
265 | len = strlen(homedir) + 17; |
266 | rundir = malloc(sizeof(char) * len); | |
b14fc100 | 267 | if (!rundir) |
268 | return NULL; | |
269 | ||
8b961418 CB |
270 | ret = snprintf(rundir, len, "%s/.cache/lxc/run/", homedir); |
271 | if (ret < 0 || (size_t)ret >= len) | |
272 | return ret_set_errno(NULL, EIO); | |
273 | ||
274 | return move_ptr(rundir); | |
9e60f51d DE |
275 | } |
276 | ||
9be53773 SH |
277 | int wait_for_pid(pid_t pid) |
278 | { | |
279 | int status, ret; | |
280 | ||
281 | again: | |
282 | ret = waitpid(pid, &status, 0); | |
283 | if (ret == -1) { | |
71b9b8ed | 284 | if (errno == EINTR) |
9be53773 | 285 | goto again; |
b14fc100 | 286 | |
9be53773 SH |
287 | return -1; |
288 | } | |
b14fc100 | 289 | |
9be53773 SH |
290 | if (ret != pid) |
291 | goto again; | |
b14fc100 | 292 | |
9be53773 SH |
293 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) |
294 | return -1; | |
b14fc100 | 295 | |
9be53773 SH |
296 | return 0; |
297 | } | |
c797a220 | 298 | |
39293f22 CB |
299 | int wait_for_pidfd(int pidfd) |
300 | { | |
301 | int ret; | |
302 | siginfo_t info = { | |
303 | .si_signo = 0, | |
304 | }; | |
305 | ||
306 | do { | |
307 | ret = waitid(P_PIDFD, pidfd, &info, __WALL | WEXITED); | |
308 | } while (ret < 0 && errno == EINTR); | |
309 | ||
310 | return !ret && WIFEXITED(info.si_status) && WEXITSTATUS(info.si_status) == 0; | |
311 | } | |
312 | ||
c797a220 CS |
313 | int lxc_wait_for_pid_status(pid_t pid) |
314 | { | |
315 | int status, ret; | |
316 | ||
317 | again: | |
318 | ret = waitpid(pid, &status, 0); | |
319 | if (ret == -1) { | |
320 | if (errno == EINTR) | |
321 | goto again; | |
b14fc100 | 322 | |
c797a220 CS |
323 | return -1; |
324 | } | |
b14fc100 | 325 | |
c797a220 CS |
326 | if (ret != pid) |
327 | goto again; | |
b14fc100 | 328 | |
c797a220 CS |
329 | return status; |
330 | } | |
92f023dc | 331 | |
fa2bb6ba SH |
332 | #ifdef HAVE_OPENSSL |
333 | #include <openssl/evp.h> | |
41246cee | 334 | |
f1258455 CB |
335 | static int do_sha1_hash(const char *buf, int buflen, unsigned char *md_value, |
336 | unsigned int *md_len) | |
41246cee | 337 | { |
fa2bb6ba SH |
338 | EVP_MD_CTX *mdctx; |
339 | const EVP_MD *md; | |
340 | ||
341 | md = EVP_get_digestbyname("sha1"); | |
f1258455 CB |
342 | if (!md) |
343 | return log_error(-1, "Unknown message digest: sha1\n"); | |
fa2bb6ba | 344 | |
b138bfcf | 345 | mdctx = EVP_MD_CTX_create(); |
fa2bb6ba SH |
346 | EVP_DigestInit_ex(mdctx, md, NULL); |
347 | EVP_DigestUpdate(mdctx, buf, buflen); | |
348 | EVP_DigestFinal_ex(mdctx, md_value, md_len); | |
b138bfcf | 349 | EVP_MD_CTX_destroy(mdctx); |
fa2bb6ba SH |
350 | |
351 | return 0; | |
41246cee DE |
352 | } |
353 | ||
7c3d3976 | 354 | int sha1sum_file(char *fnam, unsigned char *digest, unsigned int *md_len) |
3ce74686 | 355 | { |
f1258455 CB |
356 | __do_free char *buf = NULL; |
357 | __do_fclose FILE *f = NULL; | |
3ce74686 | 358 | int ret; |
3ce74686 SH |
359 | long flen; |
360 | ||
361 | if (!fnam) | |
362 | return -1; | |
b14fc100 | 363 | |
025ed0f3 | 364 | f = fopen_cloexec(fnam, "r"); |
f1258455 CB |
365 | if (!f) |
366 | return log_error_errno(-1, errno, "Failed to open template \"%s\"", fnam); | |
b14fc100 | 367 | |
f1258455 CB |
368 | if (fseek(f, 0, SEEK_END) < 0) |
369 | return log_error_errno(-1, errno, "Failed to seek to end of template"); | |
b14fc100 | 370 | |
f1258455 CB |
371 | flen = ftell(f); |
372 | if (flen < 0) | |
373 | return log_error_errno(-1, errno, "Failed to tell size of template"); | |
b14fc100 | 374 | |
f1258455 CB |
375 | if (fseek(f, 0, SEEK_SET) < 0) |
376 | return log_error_errno(-1, errno, "Failed to seek to start of template"); | |
b14fc100 | 377 | |
f1258455 CB |
378 | buf = malloc(flen + 1); |
379 | if (!buf) | |
380 | return log_error_errno(-1, ENOMEM, "Out of memory"); | |
b14fc100 | 381 | |
f1258455 CB |
382 | if (fread(buf, 1, flen, f) != flen) |
383 | return log_error_errno(-1, errno, "Failed to read template"); | |
b14fc100 | 384 | |
3ce74686 | 385 | buf[flen] = '\0'; |
fa2bb6ba | 386 | ret = do_sha1_hash(buf, flen, (void *)digest, md_len); |
3ce74686 SH |
387 | return ret; |
388 | } | |
389 | #endif | |
61a1d519 | 390 | |
8bd8018e | 391 | struct lxc_popen_FILE *lxc_popen(const char *command) |
ebec9176 | 392 | { |
3f323207 | 393 | int ret; |
ebec9176 AM |
394 | int pipe_fds[2]; |
395 | pid_t child_pid; | |
8bd8018e | 396 | struct lxc_popen_FILE *fp = NULL; |
ebec9176 | 397 | |
8bd8018e CB |
398 | ret = pipe2(pipe_fds, O_CLOEXEC); |
399 | if (ret < 0) | |
ebec9176 | 400 | return NULL; |
ebec9176 AM |
401 | |
402 | child_pid = fork(); | |
8bd8018e CB |
403 | if (child_pid < 0) |
404 | goto on_error; | |
405 | ||
406 | if (!child_pid) { | |
407 | sigset_t mask; | |
408 | ||
409 | close(pipe_fds[0]); | |
410 | ||
411 | /* duplicate stdout */ | |
412 | if (pipe_fds[1] != STDOUT_FILENO) | |
413 | ret = dup2(pipe_fds[1], STDOUT_FILENO); | |
414 | else | |
415 | ret = fcntl(pipe_fds[1], F_SETFD, 0); | |
416 | if (ret < 0) { | |
417 | close(pipe_fds[1]); | |
03f618af | 418 | _exit(EXIT_FAILURE); |
3f323207 CB |
419 | } |
420 | ||
8bd8018e CB |
421 | /* duplicate stderr */ |
422 | if (pipe_fds[1] != STDERR_FILENO) | |
423 | ret = dup2(pipe_fds[1], STDERR_FILENO); | |
424 | else | |
425 | ret = fcntl(pipe_fds[1], F_SETFD, 0); | |
426 | close(pipe_fds[1]); | |
427 | if (ret < 0) | |
03f618af | 428 | _exit(EXIT_FAILURE); |
8bd8018e CB |
429 | |
430 | /* unblock all signals */ | |
431 | ret = sigfillset(&mask); | |
432 | if (ret < 0) | |
03f618af | 433 | _exit(EXIT_FAILURE); |
8bd8018e | 434 | |
b467714b | 435 | ret = pthread_sigmask(SIG_UNBLOCK, &mask, NULL); |
8bd8018e | 436 | if (ret < 0) |
03f618af | 437 | _exit(EXIT_FAILURE); |
8bd8018e | 438 | |
ecfa5693 | 439 | /* check if /bin/sh exist, otherwise try Android location /system/bin/sh */ |
440 | if (file_exists("/bin/sh")) | |
441 | execl("/bin/sh", "sh", "-c", command, (char *)NULL); | |
442 | else | |
443 | execl("/system/bin/sh", "sh", "-c", command, (char *)NULL); | |
444 | ||
03f618af | 445 | _exit(127); |
ebec9176 AM |
446 | } |
447 | ||
8bd8018e CB |
448 | close(pipe_fds[1]); |
449 | pipe_fds[1] = -1; | |
ebec9176 | 450 | |
8bd8018e CB |
451 | fp = malloc(sizeof(*fp)); |
452 | if (!fp) | |
453 | goto on_error; | |
b14fc100 | 454 | |
7e50ec0b | 455 | memset(fp, 0, sizeof(*fp)); |
ebec9176 AM |
456 | |
457 | fp->child_pid = child_pid; | |
8bd8018e | 458 | fp->pipe = pipe_fds[0]; |
ebec9176 | 459 | |
7e50ec0b CB |
460 | /* From now on, closing fp->f will also close fp->pipe. So only ever |
461 | * call fclose(fp->f). | |
462 | */ | |
8bd8018e CB |
463 | fp->f = fdopen(pipe_fds[0], "r"); |
464 | if (!fp->f) | |
465 | goto on_error; | |
ebec9176 | 466 | |
8bd8018e | 467 | return fp; |
ebec9176 | 468 | |
8bd8018e | 469 | on_error: |
7e50ec0b CB |
470 | /* We can only close pipe_fds[0] if fdopen() didn't succeed or wasn't |
471 | * called yet. Otherwise the fd belongs to the file opened by fdopen() | |
472 | * since it isn't dup()ed. | |
473 | */ | |
474 | if (fp && !fp->f && pipe_fds[0] >= 0) | |
8bd8018e CB |
475 | close(pipe_fds[0]); |
476 | ||
477 | if (pipe_fds[1] >= 0) | |
478 | close(pipe_fds[1]); | |
ebec9176 | 479 | |
7e50ec0b CB |
480 | if (fp && fp->f) |
481 | fclose(fp->f); | |
482 | ||
483 | if (fp) | |
484 | free(fp); | |
485 | ||
ebec9176 AM |
486 | return NULL; |
487 | } | |
488 | ||
8bd8018e | 489 | int lxc_pclose(struct lxc_popen_FILE *fp) |
ebec9176 | 490 | { |
ebec9176 | 491 | pid_t wait_pid; |
8bd8018e | 492 | int wstatus = 0; |
ebec9176 | 493 | |
8bd8018e | 494 | if (!fp) |
ebec9176 | 495 | return -1; |
ebec9176 AM |
496 | |
497 | do { | |
8bd8018e CB |
498 | wait_pid = waitpid(fp->child_pid, &wstatus, 0); |
499 | } while (wait_pid < 0 && errno == EINTR); | |
ebec9176 | 500 | |
8bd8018e CB |
501 | fclose(fp->f); |
502 | free(fp); | |
503 | ||
504 | if (wait_pid < 0) | |
ebec9176 | 505 | return -1; |
ebec9176 AM |
506 | |
507 | return wstatus; | |
508 | } | |
509 | ||
508c263e SH |
510 | int randseed(bool srand_it) |
511 | { | |
4110345b | 512 | __do_fclose FILE *f = NULL; |
508c263e | 513 | /* |
7be6bcd5 | 514 | * srand pre-seed function based on /dev/urandom |
515 | */ | |
091045f8 | 516 | unsigned int seed = time(NULL) + getpid(); |
508c263e | 517 | |
4110345b | 518 | f = fopen("/dev/urandom", "re"); |
508c263e SH |
519 | if (f) { |
520 | int ret = fread(&seed, sizeof(seed), 1, f); | |
521 | if (ret != 1) | |
7be6bcd5 | 522 | SYSDEBUG("Unable to fread /dev/urandom, fallback to time+pid rand seed"); |
508c263e SH |
523 | } |
524 | ||
525 | if (srand_it) | |
526 | srand(seed); | |
527 | ||
528 | return seed; | |
529 | } | |
5d897655 SH |
530 | |
531 | uid_t get_ns_uid(uid_t orig) | |
532 | { | |
4110345b CB |
533 | __do_free char *line = NULL; |
534 | __do_fclose FILE *f = NULL; | |
5d897655 SH |
535 | size_t sz = 0; |
536 | uid_t nsid, hostid, range; | |
7be6bcd5 | 537 | |
4110345b | 538 | f = fopen("/proc/self/uid_map", "re"); |
f1258455 CB |
539 | if (!f) |
540 | return log_error_errno(0, errno, "Failed to open uid_map"); | |
5d897655 SH |
541 | |
542 | while (getline(&line, &sz, f) != -1) { | |
543 | if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3) | |
544 | continue; | |
b14fc100 | 545 | |
4110345b CB |
546 | if (hostid <= orig && hostid + range > orig) |
547 | return nsid += orig - hostid; | |
5d897655 SH |
548 | } |
549 | ||
4110345b | 550 | return LXC_INVALID_UID; |
b962868f CB |
551 | } |
552 | ||
553 | gid_t get_ns_gid(gid_t orig) | |
554 | { | |
4110345b CB |
555 | __do_free char *line = NULL; |
556 | __do_fclose FILE *f = NULL; | |
b962868f CB |
557 | size_t sz = 0; |
558 | gid_t nsid, hostid, range; | |
7be6bcd5 | 559 | |
4110345b | 560 | f = fopen("/proc/self/gid_map", "re"); |
f1258455 CB |
561 | if (!f) |
562 | return log_error_errno(0, errno, "Failed to open gid_map"); | |
b962868f CB |
563 | |
564 | while (getline(&line, &sz, f) != -1) { | |
565 | if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3) | |
566 | continue; | |
567 | ||
4110345b CB |
568 | if (hostid <= orig && hostid + range > orig) |
569 | return nsid += orig - hostid; | |
b962868f CB |
570 | } |
571 | ||
4110345b | 572 | return LXC_INVALID_GID; |
5d897655 | 573 | } |
c476bdce SH |
574 | |
575 | bool dir_exists(const char *path) | |
576 | { | |
6f61472b | 577 | return exists_dir_at(-1, path); |
c476bdce | 578 | } |
93c379f0 ÇO |
579 | |
580 | /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS. | |
581 | * FNV has good anti collision properties and we're not worried | |
582 | * about pre-image resistance or one-way-ness, we're just trying to make | |
583 | * the name unique in the 108 bytes of space we have. | |
584 | */ | |
585 | uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) | |
586 | { | |
587 | unsigned char *bp; | |
588 | ||
7be6bcd5 | 589 | for(bp = buf; bp < (unsigned char *)buf + len; bp++) { |
93c379f0 ÇO |
590 | /* xor the bottom with the current octet */ |
591 | hval ^= (uint64_t)*bp; | |
592 | ||
593 | /* gcc optimised: | |
594 | * multiply by the 64 bit FNV magic prime mod 2^64 | |
595 | */ | |
596 | hval += (hval << 1) + (hval << 4) + (hval << 5) + | |
597 | (hval << 7) + (hval << 8) + (hval << 40); | |
598 | } | |
599 | ||
600 | return hval; | |
601 | } | |
2c6f3fc9 | 602 | |
f6310f18 | 603 | bool is_shared_mountpoint(const char *path) |
2c6f3fc9 | 604 | { |
c4382ee2 CB |
605 | __do_fclose FILE *f = NULL; |
606 | __do_free char *line = NULL; | |
2c6f3fc9 | 607 | int i; |
c4382ee2 | 608 | size_t len = 0; |
2c6f3fc9 | 609 | |
4110345b | 610 | f = fopen("/proc/self/mountinfo", "re"); |
2c6f3fc9 SH |
611 | if (!f) |
612 | return 0; | |
b14fc100 | 613 | |
c4382ee2 CB |
614 | while (getline(&line, &len, f) > 0) { |
615 | char *slider1, *slider2; | |
616 | ||
617 | for (slider1 = line, i = 0; slider1 && i < 4; i++) | |
618 | slider1 = strchr(slider1 + 1, ' '); | |
619 | ||
620 | if (!slider1) | |
2c6f3fc9 | 621 | continue; |
b14fc100 | 622 | |
c4382ee2 CB |
623 | slider2 = strchr(slider1 + 1, ' '); |
624 | if (!slider2) | |
2c6f3fc9 | 625 | continue; |
b14fc100 | 626 | |
c4382ee2 CB |
627 | *slider2 = '\0'; |
628 | if (strcmp(slider1 + 1, path) == 0) { | |
f6310f18 | 629 | /* This is the path. Is it shared? */ |
c4382ee2 CB |
630 | slider1 = strchr(slider2 + 1, ' '); |
631 | if (slider1 && strstr(slider1, "shared:")) | |
f6310f18 | 632 | return true; |
2c6f3fc9 SH |
633 | } |
634 | } | |
b14fc100 | 635 | |
f6310f18 LT |
636 | return false; |
637 | } | |
638 | ||
639 | /* | |
640 | * Detect whether / is mounted MS_SHARED. The only way I know of to | |
641 | * check that is through /proc/self/mountinfo. | |
642 | * I'm only checking for /. If the container rootfs or mount location | |
643 | * is MS_SHARED, but not '/', then you're out of luck - figuring that | |
644 | * out would be too much work to be worth it. | |
645 | */ | |
646 | int detect_shared_rootfs(void) | |
647 | { | |
648 | if (is_shared_mountpoint("/")) | |
649 | return 1; | |
7be6bcd5 | 650 | |
2c6f3fc9 SH |
651 | return 0; |
652 | } | |
0e6e3a41 | 653 | |
37ef15bb CB |
654 | bool switch_to_ns(pid_t pid, const char *ns) |
655 | { | |
f62cf1d4 | 656 | __do_close int fd = -EBADF; |
b280bc38 CB |
657 | int ret; |
658 | char nspath[STRLITERALLEN("/proc//ns/") | |
659 | + INTTYPE_TO_STRLEN(pid_t) | |
660 | + LXC_NAMESPACE_NAME_MAX]; | |
51d0854c DY |
661 | |
662 | /* Switch to new ns */ | |
b280bc38 CB |
663 | ret = snprintf(nspath, sizeof(nspath), "/proc/%d/ns/%s", pid, ns); |
664 | if (ret < 0 || ret >= sizeof(nspath)) | |
51d0854c DY |
665 | return false; |
666 | ||
b280bc38 | 667 | fd = open(nspath, O_RDONLY | O_CLOEXEC); |
f1258455 CB |
668 | if (fd < 0) |
669 | return log_error_errno(false, errno, "Failed to open \"%s\"", nspath); | |
51d0854c DY |
670 | |
671 | ret = setns(fd, 0); | |
f1258455 CB |
672 | if (ret) |
673 | return log_error_errno(false, errno, "Failed to set process %d to \"%s\" of %d", pid, ns, fd); | |
b14fc100 | 674 | |
51d0854c DY |
675 | return true; |
676 | } | |
677 | ||
b7f954bb SH |
678 | /* |
679 | * looking at fs/proc_namespace.c, it appears we can | |
680 | * actually expect the rootfs entry to very specifically contain | |
681 | * " - rootfs rootfs " | |
682 | * IIUC, so long as we've chrooted so that rootfs is not our root, | |
683 | * the rootfs entry should always be skipped in mountinfo contents. | |
684 | */ | |
fa454c8e | 685 | bool detect_ramfs_rootfs(void) |
b7f954bb | 686 | { |
4110345b CB |
687 | __do_free char *line = NULL; |
688 | __do_free void *fopen_cache = NULL; | |
689 | __do_fclose FILE *f = NULL; | |
fa454c8e | 690 | size_t len = 0; |
b7f954bb | 691 | |
4110345b CB |
692 | f = fopen_cached("/proc/self/mountinfo", "re", &fopen_cache); |
693 | if (!f) | |
fa454c8e CB |
694 | return false; |
695 | ||
696 | while (getline(&line, &len, f) != -1) { | |
4110345b CB |
697 | int i; |
698 | char *p, *p2; | |
699 | ||
fa454c8e CB |
700 | for (p = line, i = 0; p && i < 4; i++) |
701 | p = strchr(p + 1, ' '); | |
b7f954bb SH |
702 | if (!p) |
703 | continue; | |
b14fc100 | 704 | |
fa454c8e | 705 | p2 = strchr(p + 1, ' '); |
b7f954bb SH |
706 | if (!p2) |
707 | continue; | |
708 | *p2 = '\0'; | |
fa454c8e | 709 | if (strcmp(p + 1, "/") == 0) { |
1a0e70ac | 710 | /* This is '/'. Is it the ramfs? */ |
fa454c8e | 711 | p = strchr(p2 + 1, '-'); |
97edebfa | 712 | if (p && strncmp(p, "- rootfs ", 9) == 0) |
fa454c8e | 713 | return true; |
b7f954bb SH |
714 | } |
715 | } | |
b14fc100 | 716 | |
fa454c8e | 717 | return false; |
b7f954bb SH |
718 | } |
719 | ||
37ef15bb CB |
720 | char *on_path(const char *cmd, const char *rootfs) |
721 | { | |
f1258455 CB |
722 | __do_free char *path = NULL; |
723 | char *entry = NULL; | |
d726953a | 724 | char cmdpath[PATH_MAX]; |
0e6e3a41 SG |
725 | int ret; |
726 | ||
727 | path = getenv("PATH"); | |
728 | if (!path) | |
8afb3e61 | 729 | return NULL; |
0e6e3a41 SG |
730 | |
731 | path = strdup(path); | |
732 | if (!path) | |
8afb3e61 | 733 | return NULL; |
0e6e3a41 | 734 | |
f1258455 | 735 | lxc_iterate_parts(entry, path, ":") { |
9d9c111c | 736 | if (rootfs) |
d726953a | 737 | ret = snprintf(cmdpath, PATH_MAX, "%s/%s/%s", rootfs, |
37ef15bb | 738 | entry, cmd); |
9d9c111c | 739 | else |
d726953a CB |
740 | ret = snprintf(cmdpath, PATH_MAX, "%s/%s", entry, cmd); |
741 | if (ret < 0 || ret >= PATH_MAX) | |
84c5549b | 742 | continue; |
0e6e3a41 | 743 | |
f1258455 | 744 | if (access(cmdpath, X_OK) == 0) |
8afb3e61 | 745 | return strdup(cmdpath); |
0e6e3a41 SG |
746 | } |
747 | ||
8afb3e61 | 748 | return NULL; |
0e6e3a41 | 749 | } |
76a26f55 | 750 | |
12983ba4 SH |
751 | bool cgns_supported(void) |
752 | { | |
753 | return file_exists("/proc/self/ns/cgroup"); | |
754 | } | |
755 | ||
9d9c111c SH |
756 | /* historically lxc-init has been under /usr/lib/lxc and under |
757 | * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc. | |
758 | */ | |
759 | char *choose_init(const char *rootfs) | |
760 | { | |
761 | char *retv = NULL; | |
370ec268 SF |
762 | const char *empty = "", |
763 | *tmp; | |
9d9c111c | 764 | int ret, env_set = 0; |
9d9c111c SH |
765 | |
766 | if (!getenv("PATH")) { | |
767 | if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0)) | |
768 | SYSERROR("Failed to setenv"); | |
b14fc100 | 769 | |
9d9c111c SH |
770 | env_set = 1; |
771 | } | |
772 | ||
773 | retv = on_path("init.lxc", rootfs); | |
774 | ||
7be6bcd5 | 775 | if (env_set) |
9d9c111c SH |
776 | if (unsetenv("PATH")) |
777 | SYSERROR("Failed to unsetenv"); | |
9d9c111c SH |
778 | |
779 | if (retv) | |
780 | return retv; | |
781 | ||
782 | retv = malloc(PATH_MAX); | |
783 | if (!retv) | |
784 | return NULL; | |
785 | ||
786 | if (rootfs) | |
370ec268 | 787 | tmp = rootfs; |
9d9c111c | 788 | else |
370ec268 SF |
789 | tmp = empty; |
790 | ||
791 | ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc"); | |
9d9c111c | 792 | if (ret < 0 || ret >= PATH_MAX) { |
7be6bcd5 | 793 | ERROR("The name of path is too long"); |
9d9c111c SH |
794 | goto out1; |
795 | } | |
b14fc100 | 796 | |
e57cd7e9 | 797 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
798 | return retv; |
799 | ||
370ec268 | 800 | ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init"); |
9d9c111c | 801 | if (ret < 0 || ret >= PATH_MAX) { |
7be6bcd5 | 802 | ERROR("The name of path is too long"); |
9d9c111c SH |
803 | goto out1; |
804 | } | |
b14fc100 | 805 | |
e57cd7e9 | 806 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
807 | return retv; |
808 | ||
370ec268 | 809 | ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp); |
9d9c111c | 810 | if (ret < 0 || ret >= PATH_MAX) { |
7be6bcd5 | 811 | ERROR("The name of path is too long"); |
9d9c111c SH |
812 | goto out1; |
813 | } | |
b14fc100 | 814 | |
e57cd7e9 | 815 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
816 | return retv; |
817 | ||
370ec268 | 818 | ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp); |
9d9c111c | 819 | if (ret < 0 || ret >= PATH_MAX) { |
7be6bcd5 | 820 | ERROR("The name of path is too long"); |
9d9c111c SH |
821 | goto out1; |
822 | } | |
b14fc100 | 823 | |
e57cd7e9 | 824 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
825 | return retv; |
826 | ||
827 | /* | |
828 | * Last resort, look for the statically compiled init.lxc which we | |
829 | * hopefully bind-mounted in. | |
830 | * If we are called during container setup, and we get to this point, | |
831 | * then the init.lxc.static from the host will need to be bind-mounted | |
832 | * in. So we return NULL here to indicate that. | |
833 | */ | |
834 | if (rootfs) | |
835 | goto out1; | |
836 | ||
837 | ret = snprintf(retv, PATH_MAX, "/init.lxc.static"); | |
838 | if (ret < 0 || ret >= PATH_MAX) { | |
839 | WARN("Nonsense - name /lxc.init.static too long"); | |
840 | goto out1; | |
841 | } | |
b14fc100 | 842 | |
e57cd7e9 | 843 | if (access(retv, X_OK) == 0) |
9d9c111c SH |
844 | return retv; |
845 | ||
846 | out1: | |
847 | free(retv); | |
848 | return NULL; | |
849 | } | |
735f2c6e | 850 | |
6010a416 SG |
851 | /* |
852 | * Given the '-t' template option to lxc-create, figure out what to | |
853 | * do. If the template is a full executable path, use that. If it | |
854 | * is something like 'sshd', then return $templatepath/lxc-sshd. | |
855 | * On success return the template, on error return NULL. | |
856 | */ | |
857 | char *get_template_path(const char *t) | |
858 | { | |
859 | int ret, len; | |
860 | char *tpath; | |
861 | ||
b275efe3 RK |
862 | if (t[0] == '/') { |
863 | if (access(t, X_OK) == 0) { | |
864 | return strdup(t); | |
865 | } else { | |
866 | SYSERROR("Bad template pathname: %s", t); | |
867 | return NULL; | |
868 | } | |
6010a416 SG |
869 | } |
870 | ||
871 | len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1; | |
b14fc100 | 872 | |
6010a416 SG |
873 | tpath = malloc(len); |
874 | if (!tpath) | |
875 | return NULL; | |
b14fc100 | 876 | |
6010a416 SG |
877 | ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t); |
878 | if (ret < 0 || ret >= len) { | |
879 | free(tpath); | |
880 | return NULL; | |
881 | } | |
b14fc100 | 882 | |
6010a416 SG |
883 | if (access(tpath, X_OK) < 0) { |
884 | SYSERROR("bad template: %s", t); | |
885 | free(tpath); | |
886 | return NULL; | |
887 | } | |
888 | ||
889 | return tpath; | |
890 | } | |
0a4be28d | 891 | |
592fd47a SH |
892 | /* |
893 | * @path: a pathname where / replaced with '\0'. | |
894 | * @offsetp: pointer to int showing which path segment was last seen. | |
895 | * Updated on return to reflect the next segment. | |
896 | * @fulllen: full original path length. | |
897 | * Returns a pointer to the next path segment, or NULL if done. | |
898 | */ | |
899 | static char *get_nextpath(char *path, int *offsetp, int fulllen) | |
900 | { | |
901 | int offset = *offsetp; | |
902 | ||
903 | if (offset >= fulllen) | |
904 | return NULL; | |
905 | ||
91d9cab6 | 906 | while (offset < fulllen && path[offset] != '\0') |
592fd47a | 907 | offset++; |
b14fc100 | 908 | |
91d9cab6 | 909 | while (offset < fulllen && path[offset] == '\0') |
592fd47a SH |
910 | offset++; |
911 | ||
912 | *offsetp = offset; | |
7be6bcd5 | 913 | |
592fd47a SH |
914 | return (offset < fulllen) ? &path[offset] : NULL; |
915 | } | |
916 | ||
917 | /* | |
918 | * Check that @subdir is a subdir of @dir. @len is the length of | |
919 | * @dir (to avoid having to recalculate it). | |
920 | */ | |
921 | static bool is_subdir(const char *subdir, const char *dir, size_t len) | |
922 | { | |
923 | size_t subdirlen = strlen(subdir); | |
924 | ||
925 | if (subdirlen < len) | |
926 | return false; | |
b14fc100 | 927 | |
592fd47a SH |
928 | if (strncmp(subdir, dir, len) != 0) |
929 | return false; | |
b14fc100 | 930 | |
592fd47a SH |
931 | if (dir[len-1] == '/') |
932 | return true; | |
b14fc100 | 933 | |
592fd47a SH |
934 | if (subdir[len] == '/' || subdirlen == len) |
935 | return true; | |
b14fc100 | 936 | |
592fd47a SH |
937 | return false; |
938 | } | |
939 | ||
940 | /* | |
941 | * Check if the open fd is a symlink. Return -ELOOP if it is. Return | |
942 | * -ENOENT if we couldn't fstat. Return 0 if the fd is ok. | |
943 | */ | |
944 | static int check_symlink(int fd) | |
945 | { | |
946 | struct stat sb; | |
b14fc100 | 947 | int ret; |
948 | ||
949 | ret = fstat(fd, &sb); | |
592fd47a SH |
950 | if (ret < 0) |
951 | return -ENOENT; | |
b14fc100 | 952 | |
592fd47a SH |
953 | if (S_ISLNK(sb.st_mode)) |
954 | return -ELOOP; | |
b14fc100 | 955 | |
592fd47a SH |
956 | return 0; |
957 | } | |
958 | ||
959 | /* | |
960 | * Open a file or directory, provided that it contains no symlinks. | |
961 | * | |
962 | * CAVEAT: This function must not be used for other purposes than container | |
963 | * setup before executing the container's init | |
964 | */ | |
965 | static int open_if_safe(int dirfd, const char *nextpath) | |
966 | { | |
967 | int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW); | |
1a0e70ac | 968 | if (newfd >= 0) /* Was not a symlink, all good. */ |
592fd47a SH |
969 | return newfd; |
970 | ||
971 | if (errno == ELOOP) | |
972 | return newfd; | |
973 | ||
974 | if (errno == EPERM || errno == EACCES) { | |
1a0e70ac CB |
975 | /* We're not root (cause we got EPERM) so try opening with |
976 | * O_PATH. | |
977 | */ | |
592fd47a SH |
978 | newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW); |
979 | if (newfd >= 0) { | |
1a0e70ac CB |
980 | /* O_PATH will return an fd for symlinks. We know |
981 | * nextpath wasn't a symlink at last openat, so if fd is | |
982 | * now a link, then something * fishy is going on. | |
592fd47a SH |
983 | */ |
984 | int ret = check_symlink(newfd); | |
985 | if (ret < 0) { | |
986 | close(newfd); | |
987 | newfd = ret; | |
988 | } | |
989 | } | |
990 | } | |
991 | ||
992 | return newfd; | |
993 | } | |
994 | ||
995 | /* | |
996 | * Open a path intending for mounting, ensuring that the final path | |
997 | * is inside the container's rootfs. | |
998 | * | |
999 | * CAVEAT: This function must not be used for other purposes than container | |
1000 | * setup before executing the container's init | |
1001 | * | |
1002 | * @target: path to be opened | |
1003 | * @prefix_skip: a part of @target in which to ignore symbolic links. This | |
1004 | * would be the container's rootfs. | |
1005 | * | |
1006 | * Return an open fd for the path, or <0 on error. | |
1007 | */ | |
1008 | static int open_without_symlink(const char *target, const char *prefix_skip) | |
1009 | { | |
1010 | int curlen = 0, dirfd, fulllen, i; | |
7be6bcd5 | 1011 | char *dup; |
592fd47a SH |
1012 | |
1013 | fulllen = strlen(target); | |
1014 | ||
1015 | /* make sure prefix-skip makes sense */ | |
01074e5b | 1016 | if (prefix_skip && strlen(prefix_skip) > 0) { |
592fd47a SH |
1017 | curlen = strlen(prefix_skip); |
1018 | if (!is_subdir(target, prefix_skip, curlen)) { | |
7be6bcd5 | 1019 | ERROR("WHOA there - target \"%s\" didn't start with prefix \"%s\"", |
1020 | target, prefix_skip); | |
592fd47a SH |
1021 | return -EINVAL; |
1022 | } | |
b14fc100 | 1023 | |
592fd47a SH |
1024 | /* |
1025 | * get_nextpath() expects the curlen argument to be | |
1026 | * on a (turned into \0) / or before it, so decrement | |
1027 | * curlen to make sure that happens | |
1028 | */ | |
1029 | if (curlen) | |
1030 | curlen--; | |
1031 | } else { | |
1032 | prefix_skip = "/"; | |
1033 | curlen = 0; | |
1034 | } | |
1035 | ||
1036 | /* Make a copy of target which we can hack up, and tokenize it */ | |
1037 | if ((dup = strdup(target)) == NULL) { | |
7be6bcd5 | 1038 | ERROR("Out of memory checking for symbolic link"); |
592fd47a SH |
1039 | return -ENOMEM; |
1040 | } | |
b14fc100 | 1041 | |
592fd47a SH |
1042 | for (i = 0; i < fulllen; i++) { |
1043 | if (dup[i] == '/') | |
1044 | dup[i] = '\0'; | |
1045 | } | |
1046 | ||
1047 | dirfd = open(prefix_skip, O_RDONLY); | |
7be6bcd5 | 1048 | if (dirfd < 0) { |
1049 | SYSERROR("Failed to open path \"%s\"", prefix_skip); | |
592fd47a | 1050 | goto out; |
7be6bcd5 | 1051 | } |
b14fc100 | 1052 | |
51a8a74c | 1053 | for (;;) { |
592fd47a SH |
1054 | int newfd, saved_errno; |
1055 | char *nextpath; | |
1056 | ||
1057 | if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL) | |
1058 | goto out; | |
b14fc100 | 1059 | |
592fd47a SH |
1060 | newfd = open_if_safe(dirfd, nextpath); |
1061 | saved_errno = errno; | |
1062 | close(dirfd); | |
b14fc100 | 1063 | |
592fd47a SH |
1064 | dirfd = newfd; |
1065 | if (newfd < 0) { | |
1066 | errno = saved_errno; | |
1067 | if (errno == ELOOP) | |
1068 | SYSERROR("%s in %s was a symbolic link!", nextpath, target); | |
b14fc100 | 1069 | |
592fd47a SH |
1070 | goto out; |
1071 | } | |
1072 | } | |
1073 | ||
1074 | out: | |
1075 | free(dup); | |
1076 | return dirfd; | |
1077 | } | |
1078 | ||
43535b6d CB |
1079 | int __safe_mount_beneath_at(int beneath_fd, const char *src, const char *dst, const char *fstype, |
1080 | unsigned int flags, const void *data) | |
65f0afde | 1081 | { |
43535b6d | 1082 | __do_close int source_fd = -EBADF, target_fd = -EBADF; |
65f0afde CB |
1083 | struct lxc_open_how how = { |
1084 | .flags = O_RDONLY | O_CLOEXEC | O_PATH, | |
c1c9193c | 1085 | .resolve = RESOLVE_NO_SYMLINKS | RESOLVE_NO_MAGICLINKS | RESOLVE_BENEATH, |
65f0afde CB |
1086 | }; |
1087 | int ret; | |
1088 | char src_buf[LXC_PROC_PID_FD_LEN], tgt_buf[LXC_PROC_PID_FD_LEN]; | |
1089 | ||
65f0afde | 1090 | if (beneath_fd < 0) |
43535b6d | 1091 | return -EINVAL; |
65f0afde CB |
1092 | |
1093 | if ((flags & MS_BIND) && src && src[0] != '/') { | |
1094 | source_fd = openat2(beneath_fd, src, &how, sizeof(how)); | |
1095 | if (source_fd < 0) | |
1096 | return -errno; | |
999f5140 CB |
1097 | ret = snprintf(src_buf, sizeof(src_buf), "/proc/self/fd/%d", source_fd); |
1098 | if (ret < 0 || ret >= sizeof(src_buf)) | |
1099 | return -EIO; | |
65f0afde CB |
1100 | } else { |
1101 | src_buf[0] = '\0'; | |
1102 | } | |
1103 | ||
1104 | target_fd = openat2(beneath_fd, dst, &how, sizeof(how)); | |
1105 | if (target_fd < 0) | |
e6d4df78 | 1106 | return log_error_errno(-errno, errno, "Failed to open %d(%s)", beneath_fd, dst); |
0dde733e CB |
1107 | ret = snprintf(tgt_buf, sizeof(tgt_buf), "/proc/self/fd/%d", target_fd); |
1108 | if (ret < 0 || ret >= sizeof(tgt_buf)) | |
1109 | return -EIO; | |
65f0afde CB |
1110 | |
1111 | if (!is_empty_string(src_buf)) | |
1112 | ret = mount(src_buf, tgt_buf, fstype, flags, data); | |
1113 | else | |
1114 | ret = mount(src, tgt_buf, fstype, flags, data); | |
1115 | ||
1116 | return ret; | |
1117 | } | |
1118 | ||
43535b6d CB |
1119 | int safe_mount_beneath(const char *beneath, const char *src, const char *dst, const char *fstype, |
1120 | unsigned int flags, const void *data) | |
1121 | { | |
1122 | __do_close int beneath_fd = -EBADF; | |
1123 | const char *path = beneath ? beneath : "/"; | |
1124 | ||
3715d0c0 | 1125 | beneath_fd = openat(-1, path, O_RDONLY | O_CLOEXEC | O_DIRECTORY | O_PATH); |
43535b6d CB |
1126 | if (beneath_fd < 0) |
1127 | return log_error_errno(-errno, errno, "Failed to open %s", path); | |
1128 | ||
1129 | return __safe_mount_beneath_at(beneath_fd, src, dst, fstype, flags, data); | |
1130 | } | |
1131 | ||
1132 | int safe_mount_beneath_at(int beneath_fd, const char *src, const char *dst, const char *fstype, | |
1133 | unsigned int flags, const void *data) | |
1134 | { | |
1135 | return __safe_mount_beneath_at(beneath_fd, src, dst, fstype, flags, data); | |
1136 | } | |
1137 | ||
592fd47a SH |
1138 | /* |
1139 | * Safely mount a path into a container, ensuring that the mount target | |
1140 | * is under the container's @rootfs. (If @rootfs is NULL, then the container | |
1141 | * uses the host's /) | |
1142 | * | |
1143 | * CAVEAT: This function must not be used for other purposes than container | |
1144 | * setup before executing the container's init | |
1145 | */ | |
1146 | int safe_mount(const char *src, const char *dest, const char *fstype, | |
1147 | unsigned long flags, const void *data, const char *rootfs) | |
1148 | { | |
1a0e70ac CB |
1149 | int destfd, ret, saved_errno; |
1150 | /* Only needs enough for /proc/self/fd/<fd>. */ | |
1151 | char srcbuf[50], destbuf[50]; | |
1152 | int srcfd = -1; | |
592fd47a SH |
1153 | const char *mntsrc = src; |
1154 | ||
1155 | if (!rootfs) | |
1156 | rootfs = ""; | |
1157 | ||
1158 | /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */ | |
1159 | if (flags & MS_BIND && src && src[0] != '/') { | |
7be6bcd5 | 1160 | INFO("This is a relative bind mount"); |
b14fc100 | 1161 | |
592fd47a SH |
1162 | srcfd = open_without_symlink(src, NULL); |
1163 | if (srcfd < 0) | |
1164 | return srcfd; | |
b14fc100 | 1165 | |
6da73634 RK |
1166 | ret = snprintf(srcbuf, sizeof(srcbuf), "/proc/self/fd/%d", srcfd); |
1167 | if (ret < 0 || ret >= (int)sizeof(srcbuf)) { | |
592fd47a SH |
1168 | close(srcfd); |
1169 | ERROR("Out of memory"); | |
1170 | return -EINVAL; | |
1171 | } | |
1172 | mntsrc = srcbuf; | |
1173 | } | |
1174 | ||
1175 | destfd = open_without_symlink(dest, rootfs); | |
1176 | if (destfd < 0) { | |
88e078ba CB |
1177 | if (srcfd != -1) { |
1178 | saved_errno = errno; | |
592fd47a | 1179 | close(srcfd); |
88e078ba CB |
1180 | errno = saved_errno; |
1181 | } | |
b14fc100 | 1182 | |
592fd47a SH |
1183 | return destfd; |
1184 | } | |
1185 | ||
6da73634 RK |
1186 | ret = snprintf(destbuf, sizeof(destbuf), "/proc/self/fd/%d", destfd); |
1187 | if (ret < 0 || ret >= (int)sizeof(destbuf)) { | |
592fd47a SH |
1188 | if (srcfd != -1) |
1189 | close(srcfd); | |
b14fc100 | 1190 | |
592fd47a SH |
1191 | close(destfd); |
1192 | ERROR("Out of memory"); | |
1193 | return -EINVAL; | |
1194 | } | |
1195 | ||
1196 | ret = mount(mntsrc, destbuf, fstype, flags, data); | |
1197 | saved_errno = errno; | |
1198 | if (srcfd != -1) | |
1199 | close(srcfd); | |
b14fc100 | 1200 | |
592fd47a SH |
1201 | close(destfd); |
1202 | if (ret < 0) { | |
1203 | errno = saved_errno; | |
7be6bcd5 | 1204 | SYSERROR("Failed to mount \"%s\" onto \"%s\"", src ? src : "(null)", dest); |
592fd47a SH |
1205 | return ret; |
1206 | } | |
1207 | ||
1208 | return 0; | |
1209 | } | |
1210 | ||
ced03a01 SH |
1211 | /* |
1212 | * Mount a proc under @rootfs if proc self points to a pid other than | |
1213 | * my own. This is needed to have a known-good proc mount for setting | |
1214 | * up LSMs both at container startup and attach. | |
1215 | * | |
1216 | * @rootfs : the rootfs where proc should be mounted | |
1217 | * | |
1218 | * Returns < 0 on failure, 0 if the correct proc was already mounted | |
1219 | * and 1 if a new proc was mounted. | |
f267d666 BP |
1220 | * |
1221 | * NOTE: not to be called from inside the container namespace! | |
ced03a01 | 1222 | */ |
943144d9 | 1223 | int lxc_mount_proc_if_needed(const char *rootfs) |
ced03a01 | 1224 | { |
7be6bcd5 | 1225 | char path[PATH_MAX] = {0}; |
6b1ba5d6 | 1226 | int link_to_pid, linklen, mypid, ret; |
40464e8a | 1227 | char link[INTTYPE_TO_STRLEN(pid_t)] = {0}; |
ced03a01 | 1228 | |
d726953a CB |
1229 | ret = snprintf(path, PATH_MAX, "%s/proc/self", rootfs); |
1230 | if (ret < 0 || ret >= PATH_MAX) { | |
7be6bcd5 | 1231 | SYSERROR("The name of proc path is too long"); |
ced03a01 SH |
1232 | return -1; |
1233 | } | |
fc2ad9dc | 1234 | |
979a0d93 | 1235 | linklen = readlink(path, link, sizeof(link)); |
fc2ad9dc | 1236 | |
d726953a CB |
1237 | ret = snprintf(path, PATH_MAX, "%s/proc", rootfs); |
1238 | if (ret < 0 || ret >= PATH_MAX) { | |
7be6bcd5 | 1239 | SYSERROR("The name of proc path is too long"); |
d539a2b2 CB |
1240 | return -1; |
1241 | } | |
fc2ad9dc CB |
1242 | |
1243 | /* /proc not mounted */ | |
1244 | if (linklen < 0) { | |
1245 | if (mkdir(path, 0755) && errno != EEXIST) | |
1246 | return -1; | |
b14fc100 | 1247 | |
ced03a01 | 1248 | goto domount; |
979a0d93 | 1249 | } else if (linklen >= sizeof(link)) { |
6b1ba5d6 | 1250 | link[linklen - 1] = '\0'; |
7be6bcd5 | 1251 | ERROR("Readlink returned truncated content: \"%s\"", link); |
6b1ba5d6 | 1252 | return -1; |
fc2ad9dc CB |
1253 | } |
1254 | ||
0059379f | 1255 | mypid = lxc_raw_getpid(); |
6b1ba5d6 CB |
1256 | INFO("I am %d, /proc/self points to \"%s\"", mypid, link); |
1257 | ||
2d036cca CB |
1258 | if (lxc_safe_int(link, &link_to_pid) < 0) |
1259 | return -1; | |
fc2ad9dc | 1260 | |
6b1ba5d6 CB |
1261 | /* correct procfs is already mounted */ |
1262 | if (link_to_pid == mypid) | |
1263 | return 0; | |
fc2ad9dc | 1264 | |
6b1ba5d6 CB |
1265 | ret = umount2(path, MNT_DETACH); |
1266 | if (ret < 0) | |
7be6bcd5 | 1267 | SYSWARN("Failed to umount \"%s\" with MNT_DETACH", path); |
ced03a01 SH |
1268 | |
1269 | domount: | |
fc2ad9dc | 1270 | /* rootfs is NULL */ |
6b1ba5d6 | 1271 | if (!strcmp(rootfs, "")) |
f267d666 BP |
1272 | ret = mount("proc", path, "proc", 0, NULL); |
1273 | else | |
1274 | ret = safe_mount("proc", path, "proc", 0, NULL, rootfs); | |
f267d666 | 1275 | if (ret < 0) |
ced03a01 | 1276 | return -1; |
f267d666 | 1277 | |
7be6bcd5 | 1278 | INFO("Mounted /proc in container for security transition"); |
ced03a01 SH |
1279 | return 1; |
1280 | } | |
69aeabac | 1281 | |
f8dd0275 | 1282 | int open_devnull(void) |
69aeabac | 1283 | { |
f8dd0275 | 1284 | int fd = open("/dev/null", O_RDWR); |
f8dd0275 AM |
1285 | if (fd < 0) |
1286 | SYSERROR("Can't open /dev/null"); | |
1287 | ||
1288 | return fd; | |
1289 | } | |
69aeabac | 1290 | |
f8dd0275 AM |
1291 | int set_stdfds(int fd) |
1292 | { | |
bbbf65ee CB |
1293 | int ret; |
1294 | ||
69aeabac TA |
1295 | if (fd < 0) |
1296 | return -1; | |
1297 | ||
bbbf65ee CB |
1298 | ret = dup2(fd, STDIN_FILENO); |
1299 | if (ret < 0) | |
f8dd0275 | 1300 | return -1; |
bbbf65ee CB |
1301 | |
1302 | ret = dup2(fd, STDOUT_FILENO); | |
1303 | if (ret < 0) | |
f8dd0275 | 1304 | return -1; |
bbbf65ee CB |
1305 | |
1306 | ret = dup2(fd, STDERR_FILENO); | |
1307 | if (ret < 0) | |
f8dd0275 AM |
1308 | return -1; |
1309 | ||
1310 | return 0; | |
1311 | } | |
1312 | ||
1313 | int null_stdfds(void) | |
1314 | { | |
1315 | int ret = -1; | |
b14fc100 | 1316 | int fd; |
f8dd0275 | 1317 | |
b14fc100 | 1318 | fd = open_devnull(); |
f8dd0275 AM |
1319 | if (fd >= 0) { |
1320 | ret = set_stdfds(fd); | |
1321 | close(fd); | |
1322 | } | |
69aeabac | 1323 | |
69aeabac TA |
1324 | return ret; |
1325 | } | |
ccb4cabe | 1326 | |
330ae3d3 | 1327 | /* Check whether a signal is blocked by a process. */ |
de3c491b | 1328 | /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */ |
40464e8a | 1329 | #define __PROC_STATUS_LEN (6 + INTTYPE_TO_STRLEN(pid_t) + 7 + 1) |
573ad77f | 1330 | bool task_blocks_signal(pid_t pid, int signal) |
330ae3d3 | 1331 | { |
4110345b CB |
1332 | __do_free char *line = NULL; |
1333 | __do_fclose FILE *f = NULL; | |
330ae3d3 | 1334 | int ret; |
7be6bcd5 | 1335 | char status[__PROC_STATUS_LEN] = {0}; |
573ad77f | 1336 | uint64_t sigblk = 0, one = 1; |
eabf1ea9 CB |
1337 | size_t n = 0; |
1338 | bool bret = false; | |
330ae3d3 | 1339 | |
de3c491b CB |
1340 | ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid); |
1341 | if (ret < 0 || ret >= __PROC_STATUS_LEN) | |
330ae3d3 CB |
1342 | return bret; |
1343 | ||
4110345b | 1344 | f = fopen(status, "re"); |
330ae3d3 | 1345 | if (!f) |
4110345b | 1346 | return false; |
330ae3d3 CB |
1347 | |
1348 | while (getline(&line, &n, f) != -1) { | |
573ad77f CB |
1349 | char *numstr; |
1350 | ||
eabf1ea9 | 1351 | if (strncmp(line, "SigBlk:", 7)) |
6fbcbe3b CB |
1352 | continue; |
1353 | ||
573ad77f CB |
1354 | numstr = lxc_trim_whitespace_in_place(line + 7); |
1355 | ret = lxc_safe_uint64(numstr, &sigblk, 16); | |
1356 | if (ret < 0) | |
4110345b | 1357 | return false; |
573ad77f CB |
1358 | |
1359 | break; | |
330ae3d3 CB |
1360 | } |
1361 | ||
573ad77f | 1362 | if (sigblk & (one << (signal - 1))) |
330ae3d3 CB |
1363 | bret = true; |
1364 | ||
330ae3d3 CB |
1365 | return bret; |
1366 | } | |
000dfda7 | 1367 | |
a687256f CB |
1368 | int lxc_preserve_ns(const int pid, const char *ns) |
1369 | { | |
1370 | int ret; | |
a052913d CB |
1371 | /* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */ |
1372 | #define __NS_PATH_LEN 50 | |
1373 | char path[__NS_PATH_LEN]; | |
a687256f | 1374 | |
4d8ac866 CB |
1375 | /* This way we can use this function to also check whether namespaces |
1376 | * are supported by the kernel by passing in the NULL or the empty | |
1377 | * string. | |
1378 | */ | |
a052913d | 1379 | ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid, |
4d8ac866 CB |
1380 | !ns || strcmp(ns, "") == 0 ? "" : "/", |
1381 | !ns || strcmp(ns, "") == 0 ? "" : ns); | |
a011ec99 CB |
1382 | if (ret < 0 || (size_t)ret >= __NS_PATH_LEN) |
1383 | return ret_errno(EIO); | |
a687256f CB |
1384 | |
1385 | return open(path, O_RDONLY | O_CLOEXEC); | |
1386 | } | |
6bc2eafe | 1387 | |
464c4611 | 1388 | bool lxc_switch_uid_gid(uid_t uid, gid_t gid) |
dbaf55a3 | 1389 | { |
db2d1af1 CB |
1390 | int ret = 0; |
1391 | ||
1392 | if (gid != LXC_INVALID_GID) { | |
2ed0ea48 | 1393 | ret = setresgid(gid, gid, gid); |
db2d1af1 CB |
1394 | if (ret < 0) { |
1395 | SYSERROR("Failed to switch to gid %d", gid); | |
464c4611 | 1396 | return false; |
db2d1af1 CB |
1397 | } |
1398 | NOTICE("Switched to gid %d", gid); | |
dbaf55a3 | 1399 | } |
dbaf55a3 | 1400 | |
db2d1af1 | 1401 | if (uid != LXC_INVALID_UID) { |
2ed0ea48 | 1402 | ret = setresuid(uid, uid, uid); |
db2d1af1 CB |
1403 | if (ret < 0) { |
1404 | SYSERROR("Failed to switch to uid %d", uid); | |
464c4611 | 1405 | return false; |
db2d1af1 CB |
1406 | } |
1407 | NOTICE("Switched to uid %d", uid); | |
dbaf55a3 | 1408 | } |
dbaf55a3 | 1409 | |
464c4611 | 1410 | return true; |
dbaf55a3 CB |
1411 | } |
1412 | ||
46b3a2f6 | 1413 | /* Simple convenience function which enables uniform logging. */ |
8af07f82 | 1414 | bool lxc_setgroups(int size, gid_t list[]) |
dbaf55a3 CB |
1415 | { |
1416 | if (setgroups(size, list) < 0) { | |
8af07f82 CB |
1417 | SYSERROR("Failed to setgroups()"); |
1418 | return false; | |
dbaf55a3 | 1419 | } |
8af07f82 | 1420 | NOTICE("Dropped additional groups"); |
dbaf55a3 | 1421 | |
8af07f82 | 1422 | return true; |
dbaf55a3 | 1423 | } |
c6868a1f CB |
1424 | |
1425 | static int lxc_get_unused_loop_dev_legacy(char *loop_name) | |
1426 | { | |
1427 | struct dirent *dp; | |
1428 | struct loop_info64 lo64; | |
1429 | DIR *dir; | |
1430 | int dfd = -1, fd = -1, ret = -1; | |
1431 | ||
1432 | dir = opendir("/dev"); | |
2f32e37e | 1433 | if (!dir) { |
1434 | SYSERROR("Failed to open \"/dev\""); | |
c6868a1f | 1435 | return -1; |
2f32e37e | 1436 | } |
c6868a1f CB |
1437 | |
1438 | while ((dp = readdir(dir))) { | |
c6868a1f CB |
1439 | if (strncmp(dp->d_name, "loop", 4) != 0) |
1440 | continue; | |
1441 | ||
1442 | dfd = dirfd(dir); | |
1443 | if (dfd < 0) | |
1444 | continue; | |
1445 | ||
1446 | fd = openat(dfd, dp->d_name, O_RDWR); | |
1447 | if (fd < 0) | |
1448 | continue; | |
1449 | ||
1450 | ret = ioctl(fd, LOOP_GET_STATUS64, &lo64); | |
1451 | if (ret < 0) { | |
1452 | if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 || | |
1453 | errno != ENXIO) { | |
1454 | close(fd); | |
1455 | fd = -1; | |
1456 | continue; | |
1457 | } | |
1458 | } | |
1459 | ||
1460 | ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name); | |
1461 | if (ret < 0 || ret >= LO_NAME_SIZE) { | |
1462 | close(fd); | |
1463 | fd = -1; | |
1464 | continue; | |
1465 | } | |
1466 | ||
1467 | break; | |
1468 | } | |
1469 | ||
1470 | closedir(dir); | |
1471 | ||
1472 | if (fd < 0) | |
1473 | return -1; | |
1474 | ||
1475 | return fd; | |
1476 | } | |
1477 | ||
1478 | static int lxc_get_unused_loop_dev(char *name_loop) | |
1479 | { | |
1480 | int loop_nr, ret; | |
1481 | int fd_ctl = -1, fd_tmp = -1; | |
1482 | ||
1483 | fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC); | |
2f32e37e | 1484 | if (fd_ctl < 0) { |
1485 | SYSERROR("Failed to open loop control"); | |
c6868a1f | 1486 | return -ENODEV; |
2f32e37e | 1487 | } |
c6868a1f CB |
1488 | |
1489 | loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE); | |
2f32e37e | 1490 | if (loop_nr < 0) { |
1491 | SYSERROR("Failed to get loop control"); | |
c6868a1f | 1492 | goto on_error; |
2f32e37e | 1493 | } |
c6868a1f CB |
1494 | |
1495 | ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr); | |
1496 | if (ret < 0 || ret >= LO_NAME_SIZE) | |
1497 | goto on_error; | |
1498 | ||
1499 | fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); | |
b11738d7 | 1500 | if (fd_tmp < 0) { |
1501 | /* on Android loop devices are moved under /dev/block, give it a shot */ | |
1502 | ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/block/loop%d", loop_nr); | |
1503 | if (ret < 0 || ret >= LO_NAME_SIZE) | |
1504 | goto on_error; | |
1505 | ||
1506 | fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC); | |
1507 | if (fd_tmp < 0) | |
1508 | SYSERROR("Failed to open loop \"%s\"", name_loop); | |
1509 | } | |
c6868a1f CB |
1510 | |
1511 | on_error: | |
1512 | close(fd_ctl); | |
1513 | return fd_tmp; | |
1514 | } | |
1515 | ||
1516 | int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags) | |
1517 | { | |
1518 | int ret; | |
1519 | struct loop_info64 lo64; | |
1520 | int fd_img = -1, fret = -1, fd_loop = -1; | |
1521 | ||
1522 | fd_loop = lxc_get_unused_loop_dev(loop_dev); | |
1523 | if (fd_loop < 0) { | |
2f32e37e | 1524 | if (fd_loop != -ENODEV) |
1525 | goto on_error; | |
1526 | ||
1527 | fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev); | |
1528 | if (fd_loop < 0) | |
c6868a1f CB |
1529 | goto on_error; |
1530 | } | |
1531 | ||
1532 | fd_img = open(source, O_RDWR | O_CLOEXEC); | |
2f32e37e | 1533 | if (fd_img < 0) { |
1534 | SYSERROR("Failed to open source \"%s\"", source); | |
c6868a1f | 1535 | goto on_error; |
2f32e37e | 1536 | } |
c6868a1f CB |
1537 | |
1538 | ret = ioctl(fd_loop, LOOP_SET_FD, fd_img); | |
2f32e37e | 1539 | if (ret < 0) { |
1540 | SYSERROR("Failed to set loop fd"); | |
c6868a1f | 1541 | goto on_error; |
2f32e37e | 1542 | } |
c6868a1f CB |
1543 | |
1544 | memset(&lo64, 0, sizeof(lo64)); | |
1545 | lo64.lo_flags = flags; | |
1546 | ||
a70c9e85 JF |
1547 | strlcpy((char *)lo64.lo_file_name, source, LO_NAME_SIZE); |
1548 | ||
c6868a1f | 1549 | ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64); |
2f32e37e | 1550 | if (ret < 0) { |
1551 | SYSERROR("Failed to set loop status64"); | |
c6868a1f | 1552 | goto on_error; |
2f32e37e | 1553 | } |
c6868a1f CB |
1554 | |
1555 | fret = 0; | |
1556 | ||
1557 | on_error: | |
1558 | if (fd_img >= 0) | |
1559 | close(fd_img); | |
1560 | ||
1561 | if (fret < 0 && fd_loop >= 0) { | |
1562 | close(fd_loop); | |
1563 | fd_loop = -1; | |
1564 | } | |
1565 | ||
1566 | return fd_loop; | |
1567 | } | |
74251e49 CB |
1568 | |
1569 | int lxc_unstack_mountpoint(const char *path, bool lazy) | |
1570 | { | |
1571 | int ret; | |
1572 | int umounts = 0; | |
1573 | ||
1574 | pop_stack: | |
1575 | ret = umount2(path, lazy ? MNT_DETACH : 0); | |
1576 | if (ret < 0) { | |
1577 | /* We consider anything else than EINVAL deadly to prevent going | |
1578 | * into an infinite loop. (The other alternative is constantly | |
1579 | * parsing /proc/self/mountinfo which is yucky and probably | |
1580 | * racy.) | |
1581 | */ | |
1582 | if (errno != EINVAL) | |
1583 | return -errno; | |
1584 | } else { | |
b4a40f7b CB |
1585 | /* Just stop counting when this happens. That'd just be so |
1586 | * stupid that we won't even bother trying to report back the | |
1587 | * correct value anymore. | |
1588 | */ | |
1589 | if (umounts != INT_MAX) | |
1590 | umounts++; | |
b14fc100 | 1591 | |
74251e49 CB |
1592 | /* We succeeded in umounting. Make sure that there's no other |
1593 | * mountpoint stacked underneath. | |
1594 | */ | |
74251e49 CB |
1595 | goto pop_stack; |
1596 | } | |
1597 | ||
1598 | return umounts; | |
1599 | } | |
ea3a694f | 1600 | |
59eac805 | 1601 | static int run_command_internal(char *buf, size_t buf_size, int (*child_fn)(void *), void *args, bool wait_status) |
ea3a694f CB |
1602 | { |
1603 | pid_t child; | |
1604 | int ret, fret, pipefd[2]; | |
1605 | ssize_t bytes; | |
1606 | ||
46210729 | 1607 | /* Make sure our callers do not receive uninitialized memory. */ |
ea3a694f CB |
1608 | if (buf_size > 0 && buf) |
1609 | buf[0] = '\0'; | |
1610 | ||
1611 | if (pipe(pipefd) < 0) { | |
7be6bcd5 | 1612 | SYSERROR("Failed to create pipe"); |
ea3a694f CB |
1613 | return -1; |
1614 | } | |
1615 | ||
a59440be | 1616 | child = lxc_raw_clone(0, NULL); |
ea3a694f CB |
1617 | if (child < 0) { |
1618 | close(pipefd[0]); | |
1619 | close(pipefd[1]); | |
7be6bcd5 | 1620 | SYSERROR("Failed to create new process"); |
ea3a694f CB |
1621 | return -1; |
1622 | } | |
1623 | ||
1624 | if (child == 0) { | |
1625 | /* Close the read-end of the pipe. */ | |
1626 | close(pipefd[0]); | |
1627 | ||
1628 | /* Redirect std{err,out} to write-end of the | |
1629 | * pipe. | |
1630 | */ | |
1631 | ret = dup2(pipefd[1], STDOUT_FILENO); | |
1632 | if (ret >= 0) | |
1633 | ret = dup2(pipefd[1], STDERR_FILENO); | |
1634 | ||
1635 | /* Close the write-end of the pipe. */ | |
1636 | close(pipefd[1]); | |
1637 | ||
1638 | if (ret < 0) { | |
7be6bcd5 | 1639 | SYSERROR("Failed to duplicate std{err,out} file descriptor"); |
d8b3f9c3 | 1640 | _exit(EXIT_FAILURE); |
ea3a694f CB |
1641 | } |
1642 | ||
1643 | /* Does not return. */ | |
1644 | child_fn(args); | |
7be6bcd5 | 1645 | ERROR("Failed to exec command"); |
d8b3f9c3 | 1646 | _exit(EXIT_FAILURE); |
ea3a694f CB |
1647 | } |
1648 | ||
1649 | /* close the write-end of the pipe */ | |
1650 | close(pipefd[1]); | |
1651 | ||
7a643c7c | 1652 | if (buf && buf_size > 0) { |
a5bc6cb0 | 1653 | bytes = lxc_read_nointr(pipefd[0], buf, buf_size - 1); |
7a643c7c CB |
1654 | if (bytes > 0) |
1655 | buf[bytes - 1] = '\0'; | |
1656 | } | |
ea3a694f | 1657 | |
99a8edfc | 1658 | if (wait_status) |
1659 | fret = lxc_wait_for_pid_status(child); | |
1660 | else | |
1661 | fret = wait_for_pid(child); | |
1662 | ||
ea3a694f CB |
1663 | /* close the read-end of the pipe */ |
1664 | close(pipefd[0]); | |
1665 | ||
1666 | return fret; | |
1667 | } | |
04ad7ffe | 1668 | |
99a8edfc | 1669 | int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args) |
1670 | { | |
1671 | return run_command_internal(buf, buf_size, child_fn, args, false); | |
1672 | } | |
1673 | ||
1674 | int run_command_status(char *buf, size_t buf_size, int (*child_fn)(void *), void *args) | |
1675 | { | |
1676 | return run_command_internal(buf, buf_size, child_fn, args, true); | |
1677 | } | |
1678 | ||
d75c14e2 CB |
1679 | bool lxc_nic_exists(char *nic) |
1680 | { | |
1681 | #define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1 | |
1682 | char path[__LXC_SYS_CLASS_NET_LEN]; | |
1683 | int ret; | |
1684 | struct stat sb; | |
1685 | ||
1686 | if (!strcmp(nic, "none")) | |
1687 | return true; | |
1688 | ||
1689 | ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic); | |
1690 | if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN) | |
1691 | return false; | |
1692 | ||
1693 | ret = stat(path, &sb); | |
1694 | if (ret < 0) | |
1695 | return false; | |
1696 | ||
1697 | return true; | |
1698 | } | |
127c6e70 | 1699 | |
6222c3f4 CB |
1700 | uint64_t lxc_find_next_power2(uint64_t n) |
1701 | { | |
1702 | /* 0 is not valid input. We return 0 to the caller since 0 is not a | |
1703 | * valid power of two. | |
1704 | */ | |
1705 | if (n == 0) | |
1706 | return 0; | |
1707 | ||
1708 | if (!(n & (n - 1))) | |
1709 | return n; | |
1710 | ||
1711 | while (n & (n - 1)) | |
1712 | n = n & (n - 1); | |
1713 | ||
1714 | n = n << 1; | |
1715 | return n; | |
1716 | } | |
1fd0f41e | 1717 | |
4d8bdfa0 CB |
1718 | static int process_dead(/* takes */ int status_fd) |
1719 | { | |
f62cf1d4 | 1720 | __do_close int dupfd = -EBADF; |
4d8bdfa0 CB |
1721 | __do_free char *line = NULL; |
1722 | __do_fclose FILE *f = NULL; | |
1723 | int ret = 0; | |
1724 | size_t n = 0; | |
1725 | ||
1726 | dupfd = dup(status_fd); | |
1727 | if (dupfd < 0) | |
1728 | return -1; | |
1729 | ||
1730 | if (fd_cloexec(dupfd, true) < 0) | |
1731 | return -1; | |
1732 | ||
92bdc593 | 1733 | f = fdopen(dupfd, "re"); |
4d8bdfa0 CB |
1734 | if (!f) |
1735 | return -1; | |
4110345b CB |
1736 | |
1737 | /* Transfer ownership of fd. */ | |
92bdc593 | 1738 | move_fd(dupfd); |
4d8bdfa0 CB |
1739 | |
1740 | ret = 0; | |
1741 | while (getline(&line, &n, f) != -1) { | |
1742 | char *state; | |
1743 | ||
1744 | if (strncmp(line, "State:", 6)) | |
1745 | continue; | |
1746 | ||
1747 | state = lxc_trim_whitespace_in_place(line + 6); | |
1748 | /* only check whether process is dead or zombie for now */ | |
1749 | if (*state == 'X' || *state == 'Z') | |
1750 | ret = 1; | |
1751 | } | |
1752 | ||
1753 | return ret; | |
1754 | } | |
1755 | ||
1756 | int lxc_set_death_signal(int signal, pid_t parent, int parent_status_fd) | |
1fd0f41e CB |
1757 | { |
1758 | int ret; | |
1759 | pid_t ppid; | |
1760 | ||
b81689a1 CB |
1761 | ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0), |
1762 | prctl_arg(0), prctl_arg(0)); | |
1fd0f41e | 1763 | |
4d8bdfa0 | 1764 | /* verify that we haven't been orphaned in the meantime */ |
1fd0f41e | 1765 | ppid = (pid_t)syscall(SYS_getppid); |
4d8bdfa0 CB |
1766 | if (ppid == 0) { /* parent outside our pidns */ |
1767 | if (parent_status_fd < 0) | |
1768 | return 0; | |
1769 | ||
1770 | if (process_dead(parent_status_fd) == 1) | |
1771 | return raise(SIGKILL); | |
1772 | } else if (ppid != parent) { | |
1773 | return raise(SIGKILL); | |
1fd0f41e CB |
1774 | } |
1775 | ||
2f32e37e | 1776 | if (ret < 0) |
1fd0f41e | 1777 | return -1; |
1fd0f41e CB |
1778 | |
1779 | return 0; | |
1780 | } | |
7ad37670 | 1781 | |
a9d4ebc1 CB |
1782 | int fd_cloexec(int fd, bool cloexec) |
1783 | { | |
1784 | int oflags, nflags; | |
1785 | ||
1786 | oflags = fcntl(fd, F_GETFD, 0); | |
1787 | if (oflags < 0) | |
1788 | return -errno; | |
1789 | ||
1790 | if (cloexec) | |
1791 | nflags = oflags | FD_CLOEXEC; | |
1792 | else | |
1793 | nflags = oflags & ~FD_CLOEXEC; | |
1794 | ||
1795 | if (nflags == oflags) | |
1796 | return 0; | |
1797 | ||
1798 | if (fcntl(fd, F_SETFD, nflags) < 0) | |
1799 | return -errno; | |
1800 | ||
1801 | return 0; | |
1802 | } | |
d7ab0375 | 1803 | |
8408a9cc | 1804 | int lxc_rm_rf(const char *dirname) |
d7ab0375 | 1805 | { |
8e64b673 CB |
1806 | __do_closedir DIR *dir = NULL; |
1807 | int fret = 0; | |
d7ab0375 | 1808 | int ret; |
1809 | struct dirent *direntp; | |
d7ab0375 | 1810 | |
1811 | dir = opendir(dirname); | |
8e64b673 CB |
1812 | if (!dir) |
1813 | return log_error_errno(-1, errno, "Failed to open dir \"%s\"", dirname); | |
d7ab0375 | 1814 | |
1815 | while ((direntp = readdir(dir))) { | |
8e64b673 | 1816 | __do_free char *pathname = NULL; |
d7ab0375 | 1817 | struct stat mystat; |
1818 | ||
1819 | if (!strcmp(direntp->d_name, ".") || | |
1820 | !strcmp(direntp->d_name, "..")) | |
1821 | continue; | |
1822 | ||
1823 | pathname = must_make_path(dirname, direntp->d_name, NULL); | |
d7ab0375 | 1824 | ret = lstat(pathname, &mystat); |
1825 | if (ret < 0) { | |
8e64b673 | 1826 | if (!fret) |
7be6bcd5 | 1827 | SYSWARN("Failed to stat \"%s\"", pathname); |
d7ab0375 | 1828 | |
8e64b673 CB |
1829 | fret = -1; |
1830 | continue; | |
d7ab0375 | 1831 | } |
1832 | ||
1833 | if (!S_ISDIR(mystat.st_mode)) | |
8e64b673 | 1834 | continue; |
d7ab0375 | 1835 | |
8408a9cc | 1836 | ret = lxc_rm_rf(pathname); |
d7ab0375 | 1837 | if (ret < 0) |
8e64b673 | 1838 | fret = -1; |
d7ab0375 | 1839 | } |
1840 | ||
1841 | ret = rmdir(dirname); | |
8e64b673 CB |
1842 | if (ret < 0) |
1843 | return log_warn_errno(-1, errno, "Failed to delete \"%s\"", dirname); | |
d7ab0375 | 1844 | |
8e64b673 | 1845 | return fret; |
d7ab0375 | 1846 | } |
b25291da | 1847 | |
39293f22 CB |
1848 | bool lxc_can_use_pidfd(int pidfd) |
1849 | { | |
1850 | int ret; | |
1851 | ||
1852 | if (pidfd < 0) | |
1853 | return log_error(false, "Kernel does not support pidfds"); | |
1854 | ||
39293f22 CB |
1855 | /* |
1856 | * We don't care whether or not children were in a waitable state. We | |
1857 | * just care whether waitid() recognizes P_PIDFD. | |
1858 | * | |
1859 | * Btw, while I have your attention, the above waitid() code is an | |
1860 | * excellent example of how _not_ to do flag-based kernel APIs. So if | |
1861 | * you ever go into kernel development or are already and you add this | |
1862 | * kind of flag potpourri even though you have read this comment shame | |
1863 | * on you. May the gods of operating system development have mercy on | |
1864 | * your soul because I won't. | |
1865 | */ | |
1866 | ret = waitid(P_PIDFD, pidfd, NULL, | |
1867 | /* Type of children to wait for. */ | |
1868 | __WALL | | |
1869 | /* How to wait for them. */ | |
1870 | WNOHANG | WNOWAIT | | |
1871 | /* What state to wait for. */ | |
1872 | WEXITED | WSTOPPED | WCONTINUED); | |
1873 | if (ret < 0) | |
1874 | return log_error_errno(false, errno, "Kernel does not support waiting on processes through pidfds"); | |
1875 | ||
8ad4fa68 CB |
1876 | ret = lxc_raw_pidfd_send_signal(pidfd, 0, NULL, 0); |
1877 | if (ret) | |
1878 | return log_error_errno(false, errno, "Kernel does not support sending singals through pidfds"); | |
1879 | ||
39293f22 CB |
1880 | return log_trace(true, "Kernel supports pidfds"); |
1881 | } | |
6aff5157 | 1882 | |
c353b0b9 | 1883 | int fix_stdio_permissions(uid_t uid) |
6aff5157 | 1884 | { |
c353b0b9 CB |
1885 | __do_close int devnull_fd = -EBADF; |
1886 | int fret = 0; | |
1887 | int std_fds[] = {STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO}; | |
6aff5157 | 1888 | int ret; |
c353b0b9 | 1889 | struct stat st, st_null; |
6aff5157 | 1890 | |
1891 | devnull_fd = open_devnull(); | |
c353b0b9 | 1892 | if (devnull_fd < 0) |
a2c26bef | 1893 | return log_trace_errno(-1, errno, "Failed to open \"/dev/null\""); |
c353b0b9 CB |
1894 | |
1895 | ret = fstat(devnull_fd, &st_null); | |
1896 | if (ret) | |
a2c26bef | 1897 | return log_trace_errno(-errno, errno, "Failed to stat \"/dev/null\""); |
6aff5157 | 1898 | |
c353b0b9 | 1899 | for (int i = 0; i < ARRAY_SIZE(std_fds); i++) { |
6aff5157 | 1900 | ret = fstat(std_fds[i], &st); |
c353b0b9 CB |
1901 | if (ret) { |
1902 | SYSWARN("Failed to stat standard I/O file descriptor %d", std_fds[i]); | |
1903 | fret = -1; | |
6aff5157 | 1904 | continue; |
1905 | } | |
1906 | ||
c353b0b9 | 1907 | if (st.st_rdev == st_null.st_rdev) |
6aff5157 | 1908 | continue; |
6aff5157 | 1909 | |
1910 | ret = fchown(std_fds[i], uid, st.st_gid); | |
c353b0b9 | 1911 | if (ret) { |
a2c26bef CB |
1912 | TRACE("Failed to chown standard I/O file descriptor %d to uid %d and gid %d", |
1913 | std_fds[i], uid, st.st_gid); | |
c353b0b9 | 1914 | fret = -1; |
a2c26bef | 1915 | continue; |
6aff5157 | 1916 | } |
1917 | ||
1918 | ret = fchmod(std_fds[i], 0700); | |
c353b0b9 | 1919 | if (ret) { |
a2c26bef | 1920 | TRACE("Failed to chmod standard I/O file descriptor %d", std_fds[i]); |
c353b0b9 | 1921 | fret = -1; |
6aff5157 | 1922 | } |
1923 | } | |
1924 | ||
c353b0b9 | 1925 | return fret; |
6aff5157 | 1926 | } |
07f89c1e CB |
1927 | |
1928 | bool multiply_overflow(int64_t base, uint64_t mult, int64_t *res) | |
1929 | { | |
1930 | if (base > 0 && base > (INT64_MAX / mult)) | |
1931 | return false; | |
1932 | ||
1933 | if (base < 0 && base < (INT64_MIN / mult)) | |
1934 | return false; | |
1935 | ||
1936 | *res = base * mult; | |
1937 | return true; | |
1938 | } |