]>
Commit | Line | Data |
---|---|---|
e3642c43 DL |
1 | /* |
2 | * lxc: linux Container library | |
3 | * | |
4 | * (C) Copyright IBM Corp. 2007, 2008 | |
5 | * | |
6 | * Authors: | |
9afe19d6 | 7 | * Daniel Lezcano <daniel.lezcano at free.fr> |
e3642c43 DL |
8 | * |
9 | * This library is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU Lesser General Public | |
11 | * License as published by the Free Software Foundation; either | |
12 | * version 2.1 of the License, or (at your option) any later version. | |
13 | * | |
14 | * This library is distributed in the hope that it will be useful, | |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * Lesser General Public License for more details. | |
18 | * | |
19 | * You should have received a copy of the GNU Lesser General Public | |
20 | * License along with this library; if not, write to the Free Software | |
250b1eec | 21 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
e3642c43 DL |
22 | */ |
23 | ||
052616eb ÇO |
24 | #include "config.h" |
25 | ||
e3642c43 DL |
26 | #include <errno.h> |
27 | #include <unistd.h> | |
d983b93c MN |
28 | #include <stdlib.h> |
29 | #include <stddef.h> | |
61a1d519 | 30 | #include <string.h> |
e3642c43 | 31 | #include <sys/types.h> |
0cc417b2 | 32 | #include <sys/vfs.h> |
e3642c43 DL |
33 | #include <sys/stat.h> |
34 | #include <sys/mman.h> | |
d983b93c | 35 | #include <sys/param.h> |
6e4bb2e0 | 36 | #include <sys/mount.h> |
d983b93c MN |
37 | #include <dirent.h> |
38 | #include <fcntl.h> | |
1b09f2c0 | 39 | #include <libgen.h> |
9be53773 SH |
40 | #include <sys/types.h> |
41 | #include <sys/wait.h> | |
502657d5 | 42 | #include <assert.h> |
0a4be28d | 43 | #include <sys/prctl.h> |
e3642c43 | 44 | |
3ce74686 | 45 | #include "utils.h" |
e3642c43 | 46 | #include "log.h" |
025ed0f3 | 47 | #include "lxclock.h" |
51d0854c | 48 | #include "namespace.h" |
e3642c43 | 49 | |
5d6ef228 SG |
50 | #ifndef PR_SET_MM |
51 | #define PR_SET_MM 35 | |
52 | #endif | |
53 | ||
8d2ede58 TA |
54 | #ifndef PR_SET_MM_MAP |
55 | #define PR_SET_MM_MAP 14 | |
553347e4 | 56 | |
8d2ede58 TA |
57 | struct prctl_mm_map { |
58 | uint64_t start_code; | |
59 | uint64_t end_code; | |
60 | uint64_t start_data; | |
61 | uint64_t end_data; | |
62 | uint64_t start_brk; | |
63 | uint64_t brk; | |
64 | uint64_t start_stack; | |
65 | uint64_t arg_start; | |
66 | uint64_t arg_end; | |
67 | uint64_t env_start; | |
68 | uint64_t env_end; | |
69 | uint64_t *auxv; | |
70 | uint32_t auxv_size; | |
71 | uint32_t exe_fd; | |
72 | }; | |
553347e4 TA |
73 | #endif |
74 | ||
4928c718 SG |
75 | #ifndef O_PATH |
76 | #define O_PATH 010000000 | |
77 | #endif | |
78 | ||
79 | #ifndef O_NOFOLLOW | |
80 | #define O_NOFOLLOW 00400000 | |
81 | #endif | |
82 | ||
e3642c43 DL |
83 | lxc_log_define(lxc_utils, lxc); |
84 | ||
4295c5de SH |
85 | /* |
86 | * if path is btrfs, tries to remove it and any subvolumes beneath it | |
87 | */ | |
88 | extern bool btrfs_try_remove_subvol(const char *path); | |
89 | ||
0cc417b2 SH |
90 | static int _recursive_rmdir(char *dirname, dev_t pdev, |
91 | const char *exclude, int level, bool onedev) | |
60bf62d4 SH |
92 | { |
93 | struct dirent dirent, *direntp; | |
94 | DIR *dir; | |
95 | int ret, failed=0; | |
96 | char pathname[MAXPATHLEN]; | |
18aa217b | 97 | bool hadexclude = false; |
60bf62d4 SH |
98 | |
99 | dir = opendir(dirname); | |
100 | if (!dir) { | |
101 | ERROR("%s: failed to open %s", __func__, dirname); | |
4355ab5f | 102 | return -1; |
60bf62d4 SH |
103 | } |
104 | ||
105 | while (!readdir_r(dir, &dirent, &direntp)) { | |
106 | struct stat mystat; | |
107 | int rc; | |
108 | ||
109 | if (!direntp) | |
110 | break; | |
111 | ||
112 | if (!strcmp(direntp->d_name, ".") || | |
113 | !strcmp(direntp->d_name, "..")) | |
114 | continue; | |
115 | ||
116 | rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name); | |
117 | if (rc < 0 || rc >= MAXPATHLEN) { | |
118 | ERROR("pathname too long"); | |
119 | failed=1; | |
120 | continue; | |
121 | } | |
18aa217b SH |
122 | |
123 | if (!level && exclude && !strcmp(direntp->d_name, exclude)) { | |
124 | ret = rmdir(pathname); | |
125 | if (ret < 0) { | |
126 | switch(errno) { | |
127 | case ENOTEMPTY: | |
0cc417b2 | 128 | INFO("Not deleting snapshot %s", pathname); |
18aa217b SH |
129 | hadexclude = true; |
130 | break; | |
131 | case ENOTDIR: | |
132 | ret = unlink(pathname); | |
133 | if (ret) | |
134 | INFO("%s: failed to remove %s", __func__, pathname); | |
135 | break; | |
136 | default: | |
137 | SYSERROR("%s: failed to rmdir %s", __func__, pathname); | |
138 | failed = 1; | |
139 | break; | |
140 | } | |
141 | } | |
142 | continue; | |
143 | } | |
144 | ||
60bf62d4 SH |
145 | ret = lstat(pathname, &mystat); |
146 | if (ret) { | |
147 | ERROR("%s: failed to stat %s", __func__, pathname); | |
4295c5de | 148 | failed = 1; |
60bf62d4 SH |
149 | continue; |
150 | } | |
4295c5de SH |
151 | if (onedev && mystat.st_dev != pdev) { |
152 | /* TODO should we be checking /proc/self/mountinfo for | |
153 | * pathname and not doing this if found? */ | |
154 | if (btrfs_try_remove_subvol(pathname)) | |
155 | INFO("Removed btrfs subvolume at %s\n", pathname); | |
60bf62d4 | 156 | continue; |
4295c5de | 157 | } |
60bf62d4 | 158 | if (S_ISDIR(mystat.st_mode)) { |
0cc417b2 | 159 | if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0) |
60bf62d4 SH |
160 | failed=1; |
161 | } else { | |
162 | if (unlink(pathname) < 0) { | |
0cc417b2 | 163 | SYSERROR("%s: failed to delete %s", __func__, pathname); |
60bf62d4 SH |
164 | failed=1; |
165 | } | |
166 | } | |
167 | } | |
168 | ||
4295c5de SH |
169 | if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) { |
170 | ERROR("%s: failed to delete %s", __func__, dirname); | |
171 | failed=1; | |
60bf62d4 SH |
172 | } |
173 | ||
025ed0f3 | 174 | ret = closedir(dir); |
025ed0f3 | 175 | if (ret) { |
60bf62d4 SH |
176 | ERROR("%s: failed to close directory %s", __func__, dirname); |
177 | failed=1; | |
178 | } | |
179 | ||
4355ab5f | 180 | return failed ? -1 : 0; |
60bf62d4 SH |
181 | } |
182 | ||
0cc417b2 SH |
183 | /* we have two different magic values for overlayfs, yay */ |
184 | #define OVERLAYFS_SUPER_MAGIC 0x794c764f | |
185 | #define OVERLAY_SUPER_MAGIC 0x794c7630 | |
186 | /* | |
187 | * In overlayfs, st_dev is unreliable. so on overlayfs we don't do | |
188 | * the lxc_rmdir_onedev() | |
189 | */ | |
190 | static bool is_native_overlayfs(const char *path) | |
191 | { | |
192 | struct statfs sb; | |
193 | ||
194 | if (statfs(path, &sb) < 0) | |
195 | return false; | |
196 | if (sb.f_type == OVERLAYFS_SUPER_MAGIC || | |
197 | sb.f_type == OVERLAY_SUPER_MAGIC) | |
198 | return true; | |
199 | return false; | |
200 | } | |
201 | ||
4355ab5f | 202 | /* returns 0 on success, -1 if there were any failures */ |
18aa217b | 203 | extern int lxc_rmdir_onedev(char *path, const char *exclude) |
60bf62d4 SH |
204 | { |
205 | struct stat mystat; | |
0cc417b2 SH |
206 | bool onedev = true; |
207 | ||
208 | if (is_native_overlayfs(path)) { | |
209 | onedev = false; | |
210 | } | |
60bf62d4 SH |
211 | |
212 | if (lstat(path, &mystat) < 0) { | |
067650d0 SH |
213 | if (errno == ENOENT) |
214 | return 0; | |
60bf62d4 | 215 | ERROR("%s: failed to stat %s", __func__, path); |
4355ab5f | 216 | return -1; |
60bf62d4 SH |
217 | } |
218 | ||
0cc417b2 | 219 | return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev); |
60bf62d4 SH |
220 | } |
221 | ||
9ddaf3bf | 222 | /* borrowed from iproute2 */ |
7c11d57a | 223 | extern int get_u16(unsigned short *val, const char *arg, int base) |
9ddaf3bf JHS |
224 | { |
225 | unsigned long res; | |
226 | char *ptr; | |
227 | ||
228 | if (!arg || !*arg) | |
229 | return -1; | |
230 | ||
09bbd745 | 231 | errno = 0; |
9ddaf3bf | 232 | res = strtoul(arg, &ptr, base); |
09bbd745 | 233 | if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0) |
9ddaf3bf JHS |
234 | return -1; |
235 | ||
236 | *val = res; | |
237 | ||
238 | return 0; | |
239 | } | |
240 | ||
3ce74686 | 241 | extern int mkdir_p(const char *dir, mode_t mode) |
1b09f2c0 | 242 | { |
3ce74686 SH |
243 | const char *tmp = dir; |
244 | const char *orig = dir; | |
860fc865 RW |
245 | char *makeme; |
246 | ||
247 | do { | |
248 | dir = tmp + strspn(tmp, "/"); | |
249 | tmp = dir + strcspn(dir, "/"); | |
d74325c4 | 250 | makeme = strndup(orig, dir - orig); |
860fc865 RW |
251 | if (*makeme) { |
252 | if (mkdir(makeme, mode) && errno != EEXIST) { | |
959aee9c | 253 | SYSERROR("failed to create directory '%s'", makeme); |
d74325c4 | 254 | free(makeme); |
860fc865 RW |
255 | return -1; |
256 | } | |
257 | } | |
d74325c4 | 258 | free(makeme); |
860fc865 | 259 | } while(tmp != dir); |
1b09f2c0 | 260 | |
98663823 | 261 | return 0; |
1b09f2c0 | 262 | } |
2a59a681 | 263 | |
44b9ae4b | 264 | char *get_rundir() |
9e60f51d | 265 | { |
97a696c6 SG |
266 | char *rundir; |
267 | const char *homedir; | |
9e60f51d | 268 | |
d6470e71 | 269 | if (geteuid() == 0) { |
c580b8d2 | 270 | rundir = strdup(RUNTIME_PATH); |
d6470e71 SG |
271 | return rundir; |
272 | } | |
97a696c6 SG |
273 | |
274 | rundir = getenv("XDG_RUNTIME_DIR"); | |
44b9ae4b SG |
275 | if (rundir) { |
276 | rundir = strdup(rundir); | |
277 | return rundir; | |
278 | } | |
97a696c6 | 279 | |
44b9ae4b SG |
280 | INFO("XDG_RUNTIME_DIR isn't set in the environment."); |
281 | homedir = getenv("HOME"); | |
282 | if (!homedir) { | |
283 | ERROR("HOME isn't set in the environment."); | |
284 | return NULL; | |
97a696c6 SG |
285 | } |
286 | ||
44b9ae4b SG |
287 | rundir = malloc(sizeof(char) * (17 + strlen(homedir))); |
288 | sprintf(rundir, "%s/.cache/lxc/run/", homedir); | |
289 | ||
9e60f51d DE |
290 | return rundir; |
291 | } | |
292 | ||
9be53773 SH |
293 | int wait_for_pid(pid_t pid) |
294 | { | |
295 | int status, ret; | |
296 | ||
297 | again: | |
298 | ret = waitpid(pid, &status, 0); | |
299 | if (ret == -1) { | |
71b9b8ed | 300 | if (errno == EINTR) |
9be53773 SH |
301 | goto again; |
302 | return -1; | |
303 | } | |
304 | if (ret != pid) | |
305 | goto again; | |
306 | if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) | |
307 | return -1; | |
308 | return 0; | |
309 | } | |
c797a220 CS |
310 | |
311 | int lxc_wait_for_pid_status(pid_t pid) | |
312 | { | |
313 | int status, ret; | |
314 | ||
315 | again: | |
316 | ret = waitpid(pid, &status, 0); | |
317 | if (ret == -1) { | |
318 | if (errno == EINTR) | |
319 | goto again; | |
320 | return -1; | |
321 | } | |
322 | if (ret != pid) | |
323 | goto again; | |
324 | return status; | |
325 | } | |
92f023dc | 326 | |
650468bb | 327 | ssize_t lxc_write_nointr(int fd, const void* buf, size_t count) |
92f023dc | 328 | { |
650468bb | 329 | ssize_t ret; |
92f023dc CS |
330 | again: |
331 | ret = write(fd, buf, count); | |
332 | if (ret < 0 && errno == EINTR) | |
333 | goto again; | |
334 | return ret; | |
335 | } | |
336 | ||
650468bb | 337 | ssize_t lxc_read_nointr(int fd, void* buf, size_t count) |
92f023dc | 338 | { |
650468bb | 339 | ssize_t ret; |
92f023dc CS |
340 | again: |
341 | ret = read(fd, buf, count); | |
342 | if (ret < 0 && errno == EINTR) | |
343 | goto again; | |
344 | return ret; | |
345 | } | |
346 | ||
650468bb | 347 | ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf) |
92f023dc | 348 | { |
650468bb | 349 | ssize_t ret; |
92f023dc CS |
350 | ret = lxc_read_nointr(fd, buf, count); |
351 | if (ret <= 0) | |
352 | return ret; | |
650468bb | 353 | if ((size_t)ret != count) |
92f023dc CS |
354 | return -1; |
355 | if (expected_buf && memcmp(buf, expected_buf, count) != 0) { | |
356 | errno = EINVAL; | |
357 | return -1; | |
358 | } | |
359 | return ret; | |
360 | } | |
3ce74686 SH |
361 | |
362 | #if HAVE_LIBGNUTLS | |
363 | #include <gnutls/gnutls.h> | |
364 | #include <gnutls/crypto.h> | |
41246cee DE |
365 | |
366 | __attribute__((constructor)) | |
367 | static void gnutls_lxc_init(void) | |
368 | { | |
369 | gnutls_global_init(); | |
370 | } | |
371 | ||
3ce74686 SH |
372 | int sha1sum_file(char *fnam, unsigned char *digest) |
373 | { | |
374 | char *buf; | |
375 | int ret; | |
376 | FILE *f; | |
377 | long flen; | |
378 | ||
379 | if (!fnam) | |
380 | return -1; | |
025ed0f3 | 381 | f = fopen_cloexec(fnam, "r"); |
7be677a8 | 382 | if (!f) { |
3ce74686 SH |
383 | SYSERROR("Error opening template"); |
384 | return -1; | |
385 | } | |
386 | if (fseek(f, 0, SEEK_END) < 0) { | |
387 | SYSERROR("Error seeking to end of template"); | |
dd1d77f9 | 388 | fclose(f); |
3ce74686 SH |
389 | return -1; |
390 | } | |
391 | if ((flen = ftell(f)) < 0) { | |
392 | SYSERROR("Error telling size of template"); | |
dd1d77f9 | 393 | fclose(f); |
3ce74686 SH |
394 | return -1; |
395 | } | |
396 | if (fseek(f, 0, SEEK_SET) < 0) { | |
397 | SYSERROR("Error seeking to start of template"); | |
dd1d77f9 | 398 | fclose(f); |
3ce74686 SH |
399 | return -1; |
400 | } | |
401 | if ((buf = malloc(flen+1)) == NULL) { | |
402 | SYSERROR("Out of memory"); | |
dd1d77f9 | 403 | fclose(f); |
3ce74686 SH |
404 | return -1; |
405 | } | |
406 | if (fread(buf, 1, flen, f) != flen) { | |
407 | SYSERROR("Failure reading template"); | |
408 | free(buf); | |
dd1d77f9 | 409 | fclose(f); |
3ce74686 SH |
410 | return -1; |
411 | } | |
dd1d77f9 | 412 | if (fclose(f) < 0) { |
3ce74686 SH |
413 | SYSERROR("Failre closing template"); |
414 | free(buf); | |
415 | return -1; | |
416 | } | |
417 | buf[flen] = '\0'; | |
418 | ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest); | |
419 | free(buf); | |
420 | return ret; | |
421 | } | |
422 | #endif | |
61a1d519 CS |
423 | |
424 | char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup) | |
425 | { | |
426 | va_list ap2; | |
427 | size_t count = 1 + skip; | |
428 | char **result; | |
429 | ||
430 | /* first determine size of argument list, we don't want to reallocate | |
431 | * constantly... | |
432 | */ | |
433 | va_copy(ap2, ap); | |
434 | while (1) { | |
435 | char* arg = va_arg(ap2, char*); | |
436 | if (!arg) | |
437 | break; | |
438 | count++; | |
439 | } | |
440 | va_end(ap2); | |
441 | ||
442 | result = calloc(count, sizeof(char*)); | |
443 | if (!result) | |
444 | return NULL; | |
445 | count = skip; | |
446 | while (1) { | |
447 | char* arg = va_arg(ap, char*); | |
448 | if (!arg) | |
449 | break; | |
450 | arg = do_strdup ? strdup(arg) : arg; | |
451 | if (!arg) | |
452 | goto oom; | |
453 | result[count++] = arg; | |
454 | } | |
455 | ||
456 | /* calloc has already set last element to NULL*/ | |
457 | return result; | |
458 | ||
459 | oom: | |
460 | free(result); | |
461 | return NULL; | |
462 | } | |
463 | ||
464 | const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip) | |
465 | { | |
466 | return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0); | |
467 | } | |
db27c8d7 | 468 | |
ebec9176 AM |
469 | extern struct lxc_popen_FILE *lxc_popen(const char *command) |
470 | { | |
471 | struct lxc_popen_FILE *fp = NULL; | |
472 | int parent_end = -1, child_end = -1; | |
473 | int pipe_fds[2]; | |
474 | pid_t child_pid; | |
475 | ||
476 | int r = pipe2(pipe_fds, O_CLOEXEC); | |
477 | ||
478 | if (r < 0) { | |
479 | ERROR("pipe2 failure"); | |
480 | return NULL; | |
481 | } | |
482 | ||
483 | parent_end = pipe_fds[0]; | |
484 | child_end = pipe_fds[1]; | |
485 | ||
486 | child_pid = fork(); | |
487 | ||
488 | if (child_pid == 0) { | |
489 | /* child */ | |
490 | int child_std_end = STDOUT_FILENO; | |
491 | ||
492 | if (child_end != child_std_end) { | |
493 | /* dup2() doesn't dup close-on-exec flag */ | |
494 | dup2(child_end, child_std_end); | |
495 | ||
496 | /* it's safe not to close child_end here | |
497 | * as it's marked close-on-exec anyway | |
498 | */ | |
499 | } else { | |
500 | /* | |
501 | * The descriptor is already the one we will use. | |
502 | * But it must not be marked close-on-exec. | |
503 | * Undo the effects. | |
504 | */ | |
57d2be54 SG |
505 | if (fcntl(child_end, F_SETFD, 0) != 0) { |
506 | SYSERROR("Failed to remove FD_CLOEXEC from fd."); | |
507 | exit(127); | |
508 | } | |
ebec9176 AM |
509 | } |
510 | ||
511 | /* | |
512 | * Unblock signals. | |
513 | * This is the main/only reason | |
514 | * why we do our lousy popen() emulation. | |
515 | */ | |
516 | { | |
517 | sigset_t mask; | |
518 | sigfillset(&mask); | |
519 | sigprocmask(SIG_UNBLOCK, &mask, NULL); | |
520 | } | |
521 | ||
522 | execl("/bin/sh", "sh", "-c", command, (char *) NULL); | |
523 | exit(127); | |
524 | } | |
525 | ||
526 | /* parent */ | |
527 | ||
528 | close(child_end); | |
529 | child_end = -1; | |
530 | ||
531 | if (child_pid < 0) { | |
532 | ERROR("fork failure"); | |
533 | goto error; | |
534 | } | |
535 | ||
536 | fp = calloc(1, sizeof(*fp)); | |
537 | if (!fp) { | |
538 | ERROR("failed to allocate memory"); | |
539 | goto error; | |
540 | } | |
541 | ||
542 | fp->f = fdopen(parent_end, "r"); | |
543 | if (!fp->f) { | |
544 | ERROR("fdopen failure"); | |
545 | goto error; | |
546 | } | |
547 | ||
548 | fp->child_pid = child_pid; | |
549 | ||
550 | return fp; | |
551 | ||
552 | error: | |
553 | ||
554 | if (fp) { | |
555 | if (fp->f) { | |
556 | fclose(fp->f); | |
557 | parent_end = -1; /* so we do not close it second time */ | |
558 | } | |
559 | ||
560 | free(fp); | |
561 | } | |
562 | ||
ebec9176 AM |
563 | if (parent_end != -1) |
564 | close(parent_end); | |
565 | ||
566 | return NULL; | |
567 | } | |
568 | ||
ebec9176 AM |
569 | extern int lxc_pclose(struct lxc_popen_FILE *fp) |
570 | { | |
571 | FILE *f = NULL; | |
572 | pid_t child_pid = 0; | |
573 | int wstatus = 0; | |
574 | pid_t wait_pid; | |
575 | ||
576 | if (fp) { | |
577 | f = fp->f; | |
578 | child_pid = fp->child_pid; | |
579 | /* free memory (we still need to close file stream) */ | |
580 | free(fp); | |
581 | fp = NULL; | |
582 | } | |
583 | ||
584 | if (!f || fclose(f)) { | |
585 | ERROR("fclose failure"); | |
586 | return -1; | |
587 | } | |
588 | ||
589 | do { | |
590 | wait_pid = waitpid(child_pid, &wstatus, 0); | |
591 | } while (wait_pid == -1 && errno == EINTR); | |
592 | ||
593 | if (wait_pid == -1) { | |
594 | ERROR("waitpid failure"); | |
595 | return -1; | |
596 | } | |
597 | ||
598 | return wstatus; | |
599 | } | |
600 | ||
502657d5 CS |
601 | char *lxc_string_replace(const char *needle, const char *replacement, const char *haystack) |
602 | { | |
603 | ssize_t len = -1, saved_len = -1; | |
604 | char *result = NULL; | |
605 | size_t replacement_len = strlen(replacement); | |
606 | size_t needle_len = strlen(needle); | |
607 | ||
608 | /* should be executed exactly twice */ | |
609 | while (len == -1 || result == NULL) { | |
610 | char *p; | |
611 | char *last_p; | |
612 | ssize_t part_len; | |
613 | ||
614 | if (len != -1) { | |
615 | result = calloc(1, len + 1); | |
616 | if (!result) | |
617 | return NULL; | |
618 | saved_len = len; | |
619 | } | |
620 | ||
621 | len = 0; | |
622 | ||
623 | for (last_p = (char *)haystack, p = strstr(last_p, needle); p; last_p = p, p = strstr(last_p, needle)) { | |
624 | part_len = (ssize_t)(p - last_p); | |
625 | if (result && part_len > 0) | |
626 | memcpy(&result[len], last_p, part_len); | |
627 | len += part_len; | |
628 | if (result && replacement_len > 0) | |
629 | memcpy(&result[len], replacement, replacement_len); | |
630 | len += replacement_len; | |
631 | p += needle_len; | |
632 | } | |
633 | part_len = strlen(last_p); | |
634 | if (result && part_len > 0) | |
635 | memcpy(&result[len], last_p, part_len); | |
636 | len += part_len; | |
637 | } | |
638 | ||
639 | /* make sure we did the same thing twice, | |
640 | * once for calculating length, the other | |
641 | * time for copying data */ | |
642 | assert(saved_len == len); | |
643 | /* make sure we didn't overwrite any buffer, | |
644 | * due to calloc the string should be 0-terminated */ | |
645 | assert(result[len] == '\0'); | |
646 | ||
647 | return result; | |
648 | } | |
649 | ||
650 | bool lxc_string_in_array(const char *needle, const char **haystack) | |
651 | { | |
652 | for (; haystack && *haystack; haystack++) | |
653 | if (!strcmp(needle, *haystack)) | |
654 | return true; | |
655 | return false; | |
656 | } | |
657 | ||
658 | char *lxc_string_join(const char *sep, const char **parts, bool use_as_prefix) | |
659 | { | |
660 | char *result; | |
661 | char **p; | |
662 | size_t sep_len = strlen(sep); | |
663 | size_t result_len = use_as_prefix * sep_len; | |
664 | ||
665 | /* calculate new string length */ | |
666 | for (p = (char **)parts; *p; p++) | |
667 | result_len += (p > (char **)parts) * sep_len + strlen(*p); | |
668 | ||
669 | result = calloc(result_len + 1, 1); | |
670 | if (!result) | |
671 | return NULL; | |
672 | ||
673 | if (use_as_prefix) | |
674 | strcpy(result, sep); | |
675 | for (p = (char **)parts; *p; p++) { | |
676 | if (p > (char **)parts) | |
677 | strcat(result, sep); | |
678 | strcat(result, *p); | |
679 | } | |
680 | ||
681 | return result; | |
682 | } | |
683 | ||
684 | char **lxc_normalize_path(const char *path) | |
685 | { | |
686 | char **components; | |
687 | char **p; | |
688 | size_t components_len = 0; | |
689 | size_t pos = 0; | |
690 | ||
691 | components = lxc_string_split(path, '/'); | |
692 | if (!components) | |
693 | return NULL; | |
694 | for (p = components; *p; p++) | |
695 | components_len++; | |
696 | ||
697 | /* resolve '.' and '..' */ | |
698 | for (pos = 0; pos < components_len; ) { | |
699 | if (!strcmp(components[pos], ".") || (!strcmp(components[pos], "..") && pos == 0)) { | |
700 | /* eat this element */ | |
701 | free(components[pos]); | |
702 | memmove(&components[pos], &components[pos+1], sizeof(char *) * (components_len - pos)); | |
703 | components_len--; | |
704 | } else if (!strcmp(components[pos], "..")) { | |
705 | /* eat this and the previous element */ | |
706 | free(components[pos - 1]); | |
707 | free(components[pos]); | |
708 | memmove(&components[pos-1], &components[pos+1], sizeof(char *) * (components_len - pos)); | |
709 | components_len -= 2; | |
710 | pos--; | |
711 | } else { | |
712 | pos++; | |
713 | } | |
714 | } | |
715 | ||
716 | return components; | |
717 | } | |
718 | ||
24b51482 CS |
719 | char *lxc_append_paths(const char *first, const char *second) |
720 | { | |
721 | size_t len = strlen(first) + strlen(second) + 1; | |
722 | const char *pattern = "%s%s"; | |
723 | char *result = NULL; | |
724 | ||
725 | if (second[0] != '/') { | |
726 | len += 1; | |
727 | pattern = "%s/%s"; | |
728 | } | |
729 | ||
730 | result = calloc(1, len); | |
731 | if (!result) | |
732 | return NULL; | |
733 | ||
734 | snprintf(result, len, pattern, first, second); | |
735 | return result; | |
736 | } | |
737 | ||
502657d5 CS |
738 | bool lxc_string_in_list(const char *needle, const char *haystack, char _sep) |
739 | { | |
740 | char *token, *str, *saveptr = NULL; | |
741 | char sep[2] = { _sep, '\0' }; | |
742 | ||
743 | if (!haystack || !needle) | |
744 | return 0; | |
745 | ||
746 | str = alloca(strlen(haystack)+1); | |
747 | strcpy(str, haystack); | |
748 | for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) { | |
749 | if (strcmp(needle, token) == 0) | |
750 | return 1; | |
751 | } | |
752 | ||
753 | return 0; | |
754 | } | |
755 | ||
756 | char **lxc_string_split(const char *string, char _sep) | |
757 | { | |
758 | char *token, *str, *saveptr = NULL; | |
759 | char sep[2] = { _sep, '\0' }; | |
760 | char **result = NULL; | |
761 | size_t result_capacity = 0; | |
762 | size_t result_count = 0; | |
763 | int r, saved_errno; | |
764 | ||
765 | if (!string) | |
766 | return calloc(1, sizeof(char *)); | |
767 | ||
768 | str = alloca(strlen(string)+1); | |
769 | strcpy(str, string); | |
770 | for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) { | |
771 | r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16); | |
772 | if (r < 0) | |
773 | goto error_out; | |
774 | result[result_count] = strdup(token); | |
775 | if (!result[result_count]) | |
776 | goto error_out; | |
777 | result_count++; | |
778 | } | |
779 | ||
780 | /* if we allocated too much, reduce it */ | |
781 | return realloc(result, (result_count + 1) * sizeof(char *)); | |
782 | error_out: | |
783 | saved_errno = errno; | |
784 | lxc_free_array((void **)result, free); | |
785 | errno = saved_errno; | |
786 | return NULL; | |
787 | } | |
788 | ||
789 | char **lxc_string_split_and_trim(const char *string, char _sep) | |
790 | { | |
791 | char *token, *str, *saveptr = NULL; | |
792 | char sep[2] = { _sep, '\0' }; | |
793 | char **result = NULL; | |
794 | size_t result_capacity = 0; | |
795 | size_t result_count = 0; | |
796 | int r, saved_errno; | |
797 | size_t i = 0; | |
798 | ||
799 | if (!string) | |
800 | return calloc(1, sizeof(char *)); | |
801 | ||
802 | str = alloca(strlen(string)+1); | |
803 | strcpy(str, string); | |
804 | for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) { | |
805 | while (token[0] == ' ' || token[0] == '\t') | |
806 | token++; | |
807 | i = strlen(token); | |
808 | while (i > 0 && (token[i - 1] == ' ' || token[i - 1] == '\t')) { | |
809 | token[i - 1] = '\0'; | |
810 | i--; | |
811 | } | |
812 | r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16); | |
813 | if (r < 0) | |
814 | goto error_out; | |
815 | result[result_count] = strdup(token); | |
816 | if (!result[result_count]) | |
817 | goto error_out; | |
818 | result_count++; | |
819 | } | |
820 | ||
821 | /* if we allocated too much, reduce it */ | |
822 | return realloc(result, (result_count + 1) * sizeof(char *)); | |
823 | error_out: | |
824 | saved_errno = errno; | |
825 | lxc_free_array((void **)result, free); | |
826 | errno = saved_errno; | |
827 | return NULL; | |
828 | } | |
829 | ||
830 | void lxc_free_array(void **array, lxc_free_fn element_free_fn) | |
831 | { | |
832 | void **p; | |
833 | for (p = array; p && *p; p++) | |
834 | element_free_fn(*p); | |
835 | free((void*)array); | |
836 | } | |
837 | ||
838 | int lxc_grow_array(void ***array, size_t* capacity, size_t new_size, size_t capacity_increment) | |
839 | { | |
840 | size_t new_capacity; | |
841 | void **new_array; | |
842 | ||
843 | /* first time around, catch some trivial mistakes of the user | |
844 | * only initializing one of these */ | |
845 | if (!*array || !*capacity) { | |
846 | *array = NULL; | |
847 | *capacity = 0; | |
848 | } | |
849 | ||
850 | new_capacity = *capacity; | |
851 | while (new_size + 1 > new_capacity) | |
852 | new_capacity += capacity_increment; | |
853 | if (new_capacity != *capacity) { | |
854 | /* we have to reallocate */ | |
855 | new_array = realloc(*array, new_capacity * sizeof(void *)); | |
856 | if (!new_array) | |
857 | return -1; | |
858 | memset(&new_array[*capacity], 0, (new_capacity - (*capacity)) * sizeof(void *)); | |
859 | *array = new_array; | |
860 | *capacity = new_capacity; | |
861 | } | |
862 | ||
863 | /* array has sufficient elements */ | |
864 | return 0; | |
865 | } | |
866 | ||
867 | size_t lxc_array_len(void **array) | |
868 | { | |
869 | void **p; | |
870 | size_t result = 0; | |
871 | ||
872 | for (p = array; p && *p; p++) | |
873 | result++; | |
874 | ||
875 | return result; | |
876 | } | |
877 | ||
0e95426b CS |
878 | int lxc_write_to_file(const char *filename, const void* buf, size_t count, bool add_newline) |
879 | { | |
880 | int fd, saved_errno; | |
881 | ssize_t ret; | |
882 | ||
883 | fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666); | |
884 | if (fd < 0) | |
885 | return -1; | |
886 | ret = lxc_write_nointr(fd, buf, count); | |
887 | if (ret < 0) | |
799f29ab | 888 | goto out_error; |
0e95426b CS |
889 | if ((size_t)ret != count) |
890 | goto out_error; | |
891 | if (add_newline) { | |
892 | ret = lxc_write_nointr(fd, "\n", 1); | |
893 | if (ret != 1) | |
894 | goto out_error; | |
895 | } | |
896 | close(fd); | |
897 | return 0; | |
898 | ||
899 | out_error: | |
900 | saved_errno = errno; | |
901 | close(fd); | |
902 | errno = saved_errno; | |
903 | return -1; | |
904 | } | |
905 | ||
906 | int lxc_read_from_file(const char *filename, void* buf, size_t count) | |
907 | { | |
908 | int fd = -1, saved_errno; | |
909 | ssize_t ret; | |
910 | ||
911 | fd = open(filename, O_RDONLY | O_CLOEXEC); | |
912 | if (fd < 0) | |
913 | return -1; | |
914 | ||
915 | if (!buf || !count) { | |
916 | char buf2[100]; | |
917 | size_t count2 = 0; | |
918 | while ((ret = read(fd, buf2, 100)) > 0) | |
919 | count2 += ret; | |
920 | if (ret >= 0) | |
921 | ret = count2; | |
922 | } else { | |
923 | memset(buf, 0, count); | |
924 | ret = read(fd, buf, count); | |
925 | } | |
926 | ||
927 | if (ret < 0) | |
928 | ERROR("read %s: %s", filename, strerror(errno)); | |
929 | ||
930 | saved_errno = errno; | |
931 | close(fd); | |
932 | errno = saved_errno; | |
933 | return ret; | |
934 | } | |
799f29ab ÇO |
935 | |
936 | void **lxc_append_null_to_array(void **array, size_t count) | |
937 | { | |
938 | void **temp; | |
939 | ||
940 | /* Append NULL to the array */ | |
941 | if (count) { | |
942 | temp = realloc(array, (count + 1) * sizeof(*array)); | |
943 | if (!temp) { | |
944 | int i; | |
945 | for (i = 0; i < count; i++) | |
946 | free(array[i]); | |
947 | free(array); | |
948 | return NULL; | |
949 | } | |
950 | array = temp; | |
951 | array[count] = NULL; | |
952 | } | |
953 | return array; | |
954 | } | |
508c263e SH |
955 | |
956 | int randseed(bool srand_it) | |
957 | { | |
958 | /* | |
959 | srand pre-seed function based on /dev/urandom | |
960 | */ | |
961 | unsigned int seed=time(NULL)+getpid(); | |
962 | ||
963 | FILE *f; | |
964 | f = fopen("/dev/urandom", "r"); | |
965 | if (f) { | |
966 | int ret = fread(&seed, sizeof(seed), 1, f); | |
967 | if (ret != 1) | |
968 | DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno)); | |
969 | fclose(f); | |
970 | } | |
971 | ||
972 | if (srand_it) | |
973 | srand(seed); | |
974 | ||
975 | return seed; | |
976 | } | |
5d897655 SH |
977 | |
978 | uid_t get_ns_uid(uid_t orig) | |
979 | { | |
980 | char *line = NULL; | |
981 | size_t sz = 0; | |
982 | uid_t nsid, hostid, range; | |
983 | FILE *f = fopen("/proc/self/uid_map", "r"); | |
984 | if (!f) | |
985 | return 0; | |
986 | ||
987 | while (getline(&line, &sz, f) != -1) { | |
988 | if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3) | |
989 | continue; | |
990 | if (hostid <= orig && hostid + range > orig) { | |
991 | nsid += orig - hostid; | |
992 | goto found; | |
993 | } | |
994 | } | |
995 | ||
996 | nsid = 0; | |
997 | found: | |
998 | fclose(f); | |
999 | free(line); | |
1000 | return nsid; | |
1001 | } | |
c476bdce SH |
1002 | |
1003 | bool dir_exists(const char *path) | |
1004 | { | |
1005 | struct stat sb; | |
1006 | int ret; | |
1007 | ||
1008 | ret = stat(path, &sb); | |
1009 | if (ret < 0) | |
1010 | // could be something other than eexist, just say no | |
1011 | return false; | |
1012 | return S_ISDIR(sb.st_mode); | |
1013 | } | |
93c379f0 ÇO |
1014 | |
1015 | /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS. | |
1016 | * FNV has good anti collision properties and we're not worried | |
1017 | * about pre-image resistance or one-way-ness, we're just trying to make | |
1018 | * the name unique in the 108 bytes of space we have. | |
1019 | */ | |
1020 | uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) | |
1021 | { | |
1022 | unsigned char *bp; | |
1023 | ||
1024 | for(bp = buf; bp < (unsigned char *)buf + len; bp++) | |
1025 | { | |
1026 | /* xor the bottom with the current octet */ | |
1027 | hval ^= (uint64_t)*bp; | |
1028 | ||
1029 | /* gcc optimised: | |
1030 | * multiply by the 64 bit FNV magic prime mod 2^64 | |
1031 | */ | |
1032 | hval += (hval << 1) + (hval << 4) + (hval << 5) + | |
1033 | (hval << 7) + (hval << 8) + (hval << 40); | |
1034 | } | |
1035 | ||
1036 | return hval; | |
1037 | } | |
2c6f3fc9 SH |
1038 | |
1039 | /* | |
1040 | * Detect whether / is mounted MS_SHARED. The only way I know of to | |
1041 | * check that is through /proc/self/mountinfo. | |
1042 | * I'm only checking for /. If the container rootfs or mount location | |
1043 | * is MS_SHARED, but not '/', then you're out of luck - figuring that | |
1044 | * out would be too much work to be worth it. | |
1045 | */ | |
1046 | #define LINELEN 4096 | |
1047 | int detect_shared_rootfs(void) | |
1048 | { | |
1049 | char buf[LINELEN], *p; | |
1050 | FILE *f; | |
1051 | int i; | |
1052 | char *p2; | |
1053 | ||
1054 | f = fopen("/proc/self/mountinfo", "r"); | |
1055 | if (!f) | |
1056 | return 0; | |
1057 | while (fgets(buf, LINELEN, f)) { | |
1058 | for (p = buf, i=0; p && i < 4; i++) | |
b7f954bb | 1059 | p = strchr(p+1, ' '); |
2c6f3fc9 SH |
1060 | if (!p) |
1061 | continue; | |
b7f954bb | 1062 | p2 = strchr(p+1, ' '); |
2c6f3fc9 SH |
1063 | if (!p2) |
1064 | continue; | |
1065 | *p2 = '\0'; | |
1066 | if (strcmp(p+1, "/") == 0) { | |
1067 | // this is '/'. is it shared? | |
b7f954bb | 1068 | p = strchr(p2+1, ' '); |
2c6f3fc9 SH |
1069 | if (p && strstr(p, "shared:")) { |
1070 | fclose(f); | |
1071 | return 1; | |
1072 | } | |
1073 | } | |
1074 | } | |
1075 | fclose(f); | |
1076 | return 0; | |
1077 | } | |
0e6e3a41 | 1078 | |
51d0854c DY |
1079 | bool switch_to_ns(pid_t pid, const char *ns) { |
1080 | int fd, ret; | |
1081 | char nspath[MAXPATHLEN]; | |
1082 | ||
1083 | /* Switch to new ns */ | |
1084 | ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns); | |
1085 | if (ret < 0 || ret >= MAXPATHLEN) | |
1086 | return false; | |
1087 | ||
1088 | fd = open(nspath, O_RDONLY); | |
1089 | if (fd < 0) { | |
1090 | SYSERROR("failed to open %s", nspath); | |
1091 | return false; | |
1092 | } | |
1093 | ||
1094 | ret = setns(fd, 0); | |
1095 | if (ret) { | |
1096 | SYSERROR("failed to set process %d to %s of %d.", pid, ns, fd); | |
1097 | close(fd); | |
1098 | return false; | |
1099 | } | |
1100 | close(fd); | |
1101 | return true; | |
1102 | } | |
1103 | ||
b7f954bb SH |
1104 | /* |
1105 | * looking at fs/proc_namespace.c, it appears we can | |
1106 | * actually expect the rootfs entry to very specifically contain | |
1107 | * " - rootfs rootfs " | |
1108 | * IIUC, so long as we've chrooted so that rootfs is not our root, | |
1109 | * the rootfs entry should always be skipped in mountinfo contents. | |
1110 | */ | |
1111 | int detect_ramfs_rootfs(void) | |
1112 | { | |
1113 | char buf[LINELEN], *p; | |
1114 | FILE *f; | |
1115 | int i; | |
1116 | char *p2; | |
1117 | ||
1118 | f = fopen("/proc/self/mountinfo", "r"); | |
1119 | if (!f) | |
1120 | return 0; | |
1121 | while (fgets(buf, LINELEN, f)) { | |
1122 | for (p = buf, i=0; p && i < 4; i++) | |
1123 | p = strchr(p+1, ' '); | |
1124 | if (!p) | |
1125 | continue; | |
1126 | p2 = strchr(p+1, ' '); | |
1127 | if (!p2) | |
1128 | continue; | |
1129 | *p2 = '\0'; | |
1130 | if (strcmp(p+1, "/") == 0) { | |
1131 | // this is '/'. is it the ramfs? | |
1132 | p = strchr(p2+1, '-'); | |
1133 | if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) { | |
1134 | fclose(f); | |
1135 | return 1; | |
1136 | } | |
1137 | } | |
1138 | } | |
1139 | fclose(f); | |
1140 | return 0; | |
1141 | } | |
1142 | ||
9d9c111c | 1143 | char *on_path(char *cmd, const char *rootfs) { |
0e6e3a41 SG |
1144 | char *path = NULL; |
1145 | char *entry = NULL; | |
1146 | char *saveptr = NULL; | |
1147 | char cmdpath[MAXPATHLEN]; | |
1148 | int ret; | |
1149 | ||
1150 | path = getenv("PATH"); | |
1151 | if (!path) | |
8afb3e61 | 1152 | return NULL; |
0e6e3a41 SG |
1153 | |
1154 | path = strdup(path); | |
1155 | if (!path) | |
8afb3e61 | 1156 | return NULL; |
0e6e3a41 SG |
1157 | |
1158 | entry = strtok_r(path, ":", &saveptr); | |
1159 | while (entry) { | |
9d9c111c SH |
1160 | if (rootfs) |
1161 | ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs, entry, cmd); | |
1162 | else | |
1163 | ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd); | |
0e6e3a41 SG |
1164 | |
1165 | if (ret < 0 || ret >= MAXPATHLEN) | |
1166 | goto next_loop; | |
1167 | ||
1168 | if (access(cmdpath, X_OK) == 0) { | |
1169 | free(path); | |
8afb3e61 | 1170 | return strdup(cmdpath); |
0e6e3a41 SG |
1171 | } |
1172 | ||
1173 | next_loop: | |
b707e368 | 1174 | entry = strtok_r(NULL, ":", &saveptr); |
0e6e3a41 SG |
1175 | } |
1176 | ||
1177 | free(path); | |
8afb3e61 | 1178 | return NULL; |
0e6e3a41 | 1179 | } |
76a26f55 SH |
1180 | |
1181 | bool file_exists(const char *f) | |
1182 | { | |
1183 | struct stat statbuf; | |
1184 | ||
1185 | return stat(f, &statbuf) == 0; | |
1186 | } | |
9d9c111c | 1187 | |
12983ba4 SH |
1188 | bool cgns_supported(void) |
1189 | { | |
1190 | return file_exists("/proc/self/ns/cgroup"); | |
1191 | } | |
1192 | ||
9d9c111c SH |
1193 | /* historically lxc-init has been under /usr/lib/lxc and under |
1194 | * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc. | |
1195 | */ | |
1196 | char *choose_init(const char *rootfs) | |
1197 | { | |
1198 | char *retv = NULL; | |
370ec268 SF |
1199 | const char *empty = "", |
1200 | *tmp; | |
9d9c111c SH |
1201 | int ret, env_set = 0; |
1202 | struct stat mystat; | |
1203 | ||
1204 | if (!getenv("PATH")) { | |
1205 | if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0)) | |
1206 | SYSERROR("Failed to setenv"); | |
1207 | env_set = 1; | |
1208 | } | |
1209 | ||
1210 | retv = on_path("init.lxc", rootfs); | |
1211 | ||
1212 | if (env_set) { | |
1213 | if (unsetenv("PATH")) | |
1214 | SYSERROR("Failed to unsetenv"); | |
1215 | } | |
1216 | ||
1217 | if (retv) | |
1218 | return retv; | |
1219 | ||
1220 | retv = malloc(PATH_MAX); | |
1221 | if (!retv) | |
1222 | return NULL; | |
1223 | ||
1224 | if (rootfs) | |
370ec268 | 1225 | tmp = rootfs; |
9d9c111c | 1226 | else |
370ec268 SF |
1227 | tmp = empty; |
1228 | ||
1229 | ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc"); | |
9d9c111c SH |
1230 | if (ret < 0 || ret >= PATH_MAX) { |
1231 | ERROR("pathname too long"); | |
1232 | goto out1; | |
1233 | } | |
1234 | ||
1235 | ret = stat(retv, &mystat); | |
1236 | if (ret == 0) | |
1237 | return retv; | |
1238 | ||
370ec268 | 1239 | ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init"); |
9d9c111c SH |
1240 | if (ret < 0 || ret >= PATH_MAX) { |
1241 | ERROR("pathname too long"); | |
1242 | goto out1; | |
1243 | } | |
1244 | ||
1245 | ret = stat(retv, &mystat); | |
1246 | if (ret == 0) | |
1247 | return retv; | |
1248 | ||
370ec268 | 1249 | ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp); |
9d9c111c SH |
1250 | if (ret < 0 || ret >= PATH_MAX) { |
1251 | ERROR("pathname too long"); | |
1252 | goto out1; | |
1253 | } | |
1254 | ret = stat(retv, &mystat); | |
1255 | if (ret == 0) | |
1256 | return retv; | |
1257 | ||
370ec268 | 1258 | ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp); |
9d9c111c SH |
1259 | if (ret < 0 || ret >= PATH_MAX) { |
1260 | ERROR("pathname too long"); | |
1261 | goto out1; | |
1262 | } | |
1263 | ret = stat(retv, &mystat); | |
1264 | if (ret == 0) | |
1265 | return retv; | |
1266 | ||
1267 | /* | |
1268 | * Last resort, look for the statically compiled init.lxc which we | |
1269 | * hopefully bind-mounted in. | |
1270 | * If we are called during container setup, and we get to this point, | |
1271 | * then the init.lxc.static from the host will need to be bind-mounted | |
1272 | * in. So we return NULL here to indicate that. | |
1273 | */ | |
1274 | if (rootfs) | |
1275 | goto out1; | |
1276 | ||
1277 | ret = snprintf(retv, PATH_MAX, "/init.lxc.static"); | |
1278 | if (ret < 0 || ret >= PATH_MAX) { | |
1279 | WARN("Nonsense - name /lxc.init.static too long"); | |
1280 | goto out1; | |
1281 | } | |
1282 | ret = stat(retv, &mystat); | |
1283 | if (ret == 0) | |
1284 | return retv; | |
1285 | ||
1286 | out1: | |
1287 | free(retv); | |
1288 | return NULL; | |
1289 | } | |
735f2c6e TA |
1290 | |
1291 | int print_to_file(const char *file, const char *content) | |
1292 | { | |
1293 | FILE *f; | |
1294 | int ret = 0; | |
1295 | ||
1296 | f = fopen(file, "w"); | |
1297 | if (!f) | |
1298 | return -1; | |
1299 | if (fprintf(f, "%s", content) != strlen(content)) | |
1300 | ret = -1; | |
1301 | fclose(f); | |
1302 | return ret; | |
1303 | } | |
e1daebd9 SH |
1304 | |
1305 | int is_dir(const char *path) | |
1306 | { | |
1307 | struct stat statbuf; | |
1308 | int ret = stat(path, &statbuf); | |
1309 | if (ret == 0 && S_ISDIR(statbuf.st_mode)) | |
1310 | return 1; | |
1311 | return 0; | |
1312 | } | |
6010a416 SG |
1313 | |
1314 | /* | |
1315 | * Given the '-t' template option to lxc-create, figure out what to | |
1316 | * do. If the template is a full executable path, use that. If it | |
1317 | * is something like 'sshd', then return $templatepath/lxc-sshd. | |
1318 | * On success return the template, on error return NULL. | |
1319 | */ | |
1320 | char *get_template_path(const char *t) | |
1321 | { | |
1322 | int ret, len; | |
1323 | char *tpath; | |
1324 | ||
1325 | if (t[0] == '/' && access(t, X_OK) == 0) { | |
1326 | tpath = strdup(t); | |
1327 | return tpath; | |
1328 | } | |
1329 | ||
1330 | len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1; | |
1331 | tpath = malloc(len); | |
1332 | if (!tpath) | |
1333 | return NULL; | |
1334 | ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t); | |
1335 | if (ret < 0 || ret >= len) { | |
1336 | free(tpath); | |
1337 | return NULL; | |
1338 | } | |
1339 | if (access(tpath, X_OK) < 0) { | |
1340 | SYSERROR("bad template: %s", t); | |
1341 | free(tpath); | |
1342 | return NULL; | |
1343 | } | |
1344 | ||
1345 | return tpath; | |
1346 | } | |
0a4be28d TA |
1347 | |
1348 | /* | |
1349 | * Sets the process title to the specified title. Note: | |
1350 | * 1. this function requires root to succeed | |
1351 | * 2. it clears /proc/self/environ | |
1352 | * 3. it may not succed (e.g. if title is longer than /proc/self/environ + | |
1353 | * the original title) | |
1354 | */ | |
1355 | int setproctitle(char *title) | |
1356 | { | |
058b94fe | 1357 | static char *proctitle = NULL; |
0a4be28d TA |
1358 | char buf[2048], *tmp; |
1359 | FILE *f; | |
1360 | int i, len, ret = 0; | |
93525c00 TA |
1361 | |
1362 | /* We don't really need to know all of this stuff, but unfortunately | |
1363 | * PR_SET_MM_MAP requires us to set it all at once, so we have to | |
1364 | * figure it out anyway. | |
1365 | */ | |
1366 | unsigned long start_data, end_data, start_brk, start_code, end_code, | |
1367 | start_stack, arg_start, arg_end, env_start, env_end, | |
1368 | brk_val; | |
1369 | struct prctl_mm_map prctl_map; | |
0a4be28d TA |
1370 | |
1371 | f = fopen_cloexec("/proc/self/stat", "r"); | |
1372 | if (!f) { | |
1373 | return -1; | |
1374 | } | |
1375 | ||
1376 | tmp = fgets(buf, sizeof(buf), f); | |
1377 | fclose(f); | |
1378 | if (!tmp) { | |
1379 | return -1; | |
1380 | } | |
1381 | ||
93525c00 TA |
1382 | /* Skip the first 25 fields, column 26-28 are start_code, end_code, |
1383 | * and start_stack */ | |
0a4be28d | 1384 | tmp = strchr(buf, ' '); |
93525c00 | 1385 | for (i = 0; i < 24; i++) { |
0a4be28d TA |
1386 | if (!tmp) |
1387 | return -1; | |
1388 | tmp = strchr(tmp+1, ' '); | |
1389 | } | |
73c1c887 SH |
1390 | if (!tmp) |
1391 | return -1; | |
1392 | ||
93525c00 TA |
1393 | i = sscanf(tmp, "%lu %lu %lu", &start_code, &end_code, &start_stack); |
1394 | if (i != 3) | |
0a4be28d | 1395 | return -1; |
93525c00 TA |
1396 | |
1397 | /* Skip the next 19 fields, column 45-51 are start_data to arg_end */ | |
1398 | for (i = 0; i < 19; i++) { | |
1399 | if (!tmp) | |
1400 | return -1; | |
1401 | tmp = strchr(tmp+1, ' '); | |
0a4be28d TA |
1402 | } |
1403 | ||
93525c00 TA |
1404 | if (!tmp) |
1405 | return -1; | |
1406 | ||
1407 | i = sscanf(tmp, "%lu %lu %lu %lu %lu %lu %lu", | |
1408 | &start_data, | |
1409 | &end_data, | |
1410 | &start_brk, | |
1411 | &arg_start, | |
1412 | &arg_end, | |
1413 | &env_start, | |
1414 | &env_end); | |
1415 | if (i != 7) | |
1416 | return -1; | |
1417 | ||
96fe6d1d TA |
1418 | /* Include the null byte here, because in the calculations below we |
1419 | * want to have room for it. */ | |
1420 | len = strlen(title) + 1; | |
1421 | ||
058b94fe TA |
1422 | /* If we don't have enough room by just overwriting the old proctitle, |
1423 | * let's allocate a new one. | |
1424 | */ | |
1425 | if (len > arg_end - arg_start) { | |
1426 | void *m; | |
1427 | m = realloc(proctitle, len); | |
1428 | if (!m) | |
70642c33 | 1429 | return -1; |
058b94fe | 1430 | proctitle = m; |
70642c33 | 1431 | |
058b94fe | 1432 | arg_start = (unsigned long) proctitle; |
0a4be28d TA |
1433 | } |
1434 | ||
058b94fe TA |
1435 | arg_end = arg_start + len; |
1436 | ||
93525c00 | 1437 | brk_val = syscall(__NR_brk, 0); |
0a4be28d | 1438 | |
93525c00 TA |
1439 | prctl_map = (struct prctl_mm_map) { |
1440 | .start_code = start_code, | |
1441 | .end_code = end_code, | |
1442 | .start_stack = start_stack, | |
1443 | .start_data = start_data, | |
1444 | .end_data = end_data, | |
1445 | .start_brk = start_brk, | |
1446 | .brk = brk_val, | |
1447 | .arg_start = arg_start, | |
1448 | .arg_end = arg_end, | |
1449 | .env_start = env_start, | |
1450 | .env_end = env_end, | |
1451 | .auxv = NULL, | |
1452 | .auxv_size = 0, | |
1453 | .exe_fd = -1, | |
1454 | }; | |
1455 | ||
1456 | ret = prctl(PR_SET_MM, PR_SET_MM_MAP, (long) &prctl_map, sizeof(prctl_map), 0); | |
1457 | if (ret == 0) | |
1458 | strcpy((char*)arg_start, title); | |
1459 | else | |
2681c0e7 | 1460 | INFO("setting cmdline failed - %s", strerror(errno)); |
0a4be28d TA |
1461 | |
1462 | return ret; | |
1463 | } | |
ced03a01 | 1464 | |
592fd47a SH |
1465 | /* |
1466 | * @path: a pathname where / replaced with '\0'. | |
1467 | * @offsetp: pointer to int showing which path segment was last seen. | |
1468 | * Updated on return to reflect the next segment. | |
1469 | * @fulllen: full original path length. | |
1470 | * Returns a pointer to the next path segment, or NULL if done. | |
1471 | */ | |
1472 | static char *get_nextpath(char *path, int *offsetp, int fulllen) | |
1473 | { | |
1474 | int offset = *offsetp; | |
1475 | ||
1476 | if (offset >= fulllen) | |
1477 | return NULL; | |
1478 | ||
1479 | while (path[offset] != '\0' && offset < fulllen) | |
1480 | offset++; | |
1481 | while (path[offset] == '\0' && offset < fulllen) | |
1482 | offset++; | |
1483 | ||
1484 | *offsetp = offset; | |
1485 | return (offset < fulllen) ? &path[offset] : NULL; | |
1486 | } | |
1487 | ||
1488 | /* | |
1489 | * Check that @subdir is a subdir of @dir. @len is the length of | |
1490 | * @dir (to avoid having to recalculate it). | |
1491 | */ | |
1492 | static bool is_subdir(const char *subdir, const char *dir, size_t len) | |
1493 | { | |
1494 | size_t subdirlen = strlen(subdir); | |
1495 | ||
1496 | if (subdirlen < len) | |
1497 | return false; | |
1498 | if (strncmp(subdir, dir, len) != 0) | |
1499 | return false; | |
1500 | if (dir[len-1] == '/') | |
1501 | return true; | |
1502 | if (subdir[len] == '/' || subdirlen == len) | |
1503 | return true; | |
1504 | return false; | |
1505 | } | |
1506 | ||
1507 | /* | |
1508 | * Check if the open fd is a symlink. Return -ELOOP if it is. Return | |
1509 | * -ENOENT if we couldn't fstat. Return 0 if the fd is ok. | |
1510 | */ | |
1511 | static int check_symlink(int fd) | |
1512 | { | |
1513 | struct stat sb; | |
1514 | int ret = fstat(fd, &sb); | |
1515 | if (ret < 0) | |
1516 | return -ENOENT; | |
1517 | if (S_ISLNK(sb.st_mode)) | |
1518 | return -ELOOP; | |
1519 | return 0; | |
1520 | } | |
1521 | ||
1522 | /* | |
1523 | * Open a file or directory, provided that it contains no symlinks. | |
1524 | * | |
1525 | * CAVEAT: This function must not be used for other purposes than container | |
1526 | * setup before executing the container's init | |
1527 | */ | |
1528 | static int open_if_safe(int dirfd, const char *nextpath) | |
1529 | { | |
1530 | int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW); | |
1531 | if (newfd >= 0) // was not a symlink, all good | |
1532 | return newfd; | |
1533 | ||
1534 | if (errno == ELOOP) | |
1535 | return newfd; | |
1536 | ||
1537 | if (errno == EPERM || errno == EACCES) { | |
1538 | /* we're not root (cause we got EPERM) so | |
1539 | try opening with O_PATH */ | |
1540 | newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW); | |
1541 | if (newfd >= 0) { | |
1542 | /* O_PATH will return an fd for symlinks. We know | |
1543 | * nextpath wasn't a symlink at last openat, so if fd | |
1544 | * is now a link, then something * fishy is going on | |
1545 | */ | |
1546 | int ret = check_symlink(newfd); | |
1547 | if (ret < 0) { | |
1548 | close(newfd); | |
1549 | newfd = ret; | |
1550 | } | |
1551 | } | |
1552 | } | |
1553 | ||
1554 | return newfd; | |
1555 | } | |
1556 | ||
1557 | /* | |
1558 | * Open a path intending for mounting, ensuring that the final path | |
1559 | * is inside the container's rootfs. | |
1560 | * | |
1561 | * CAVEAT: This function must not be used for other purposes than container | |
1562 | * setup before executing the container's init | |
1563 | * | |
1564 | * @target: path to be opened | |
1565 | * @prefix_skip: a part of @target in which to ignore symbolic links. This | |
1566 | * would be the container's rootfs. | |
1567 | * | |
1568 | * Return an open fd for the path, or <0 on error. | |
1569 | */ | |
1570 | static int open_without_symlink(const char *target, const char *prefix_skip) | |
1571 | { | |
1572 | int curlen = 0, dirfd, fulllen, i; | |
1573 | char *dup = NULL; | |
1574 | ||
1575 | fulllen = strlen(target); | |
1576 | ||
1577 | /* make sure prefix-skip makes sense */ | |
01074e5b | 1578 | if (prefix_skip && strlen(prefix_skip) > 0) { |
592fd47a SH |
1579 | curlen = strlen(prefix_skip); |
1580 | if (!is_subdir(target, prefix_skip, curlen)) { | |
1581 | ERROR("WHOA there - target '%s' didn't start with prefix '%s'", | |
1582 | target, prefix_skip); | |
1583 | return -EINVAL; | |
1584 | } | |
1585 | /* | |
1586 | * get_nextpath() expects the curlen argument to be | |
1587 | * on a (turned into \0) / or before it, so decrement | |
1588 | * curlen to make sure that happens | |
1589 | */ | |
1590 | if (curlen) | |
1591 | curlen--; | |
1592 | } else { | |
1593 | prefix_skip = "/"; | |
1594 | curlen = 0; | |
1595 | } | |
1596 | ||
1597 | /* Make a copy of target which we can hack up, and tokenize it */ | |
1598 | if ((dup = strdup(target)) == NULL) { | |
1599 | SYSERROR("Out of memory checking for symbolic link"); | |
1600 | return -ENOMEM; | |
1601 | } | |
1602 | for (i = 0; i < fulllen; i++) { | |
1603 | if (dup[i] == '/') | |
1604 | dup[i] = '\0'; | |
1605 | } | |
1606 | ||
1607 | dirfd = open(prefix_skip, O_RDONLY); | |
1608 | if (dirfd < 0) | |
1609 | goto out; | |
1610 | while (1) { | |
1611 | int newfd, saved_errno; | |
1612 | char *nextpath; | |
1613 | ||
1614 | if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL) | |
1615 | goto out; | |
1616 | newfd = open_if_safe(dirfd, nextpath); | |
1617 | saved_errno = errno; | |
1618 | close(dirfd); | |
1619 | dirfd = newfd; | |
1620 | if (newfd < 0) { | |
1621 | errno = saved_errno; | |
1622 | if (errno == ELOOP) | |
1623 | SYSERROR("%s in %s was a symbolic link!", nextpath, target); | |
592fd47a SH |
1624 | goto out; |
1625 | } | |
1626 | } | |
1627 | ||
1628 | out: | |
1629 | free(dup); | |
1630 | return dirfd; | |
1631 | } | |
1632 | ||
1633 | /* | |
1634 | * Safely mount a path into a container, ensuring that the mount target | |
1635 | * is under the container's @rootfs. (If @rootfs is NULL, then the container | |
1636 | * uses the host's /) | |
1637 | * | |
1638 | * CAVEAT: This function must not be used for other purposes than container | |
1639 | * setup before executing the container's init | |
1640 | */ | |
1641 | int safe_mount(const char *src, const char *dest, const char *fstype, | |
1642 | unsigned long flags, const void *data, const char *rootfs) | |
1643 | { | |
1644 | int srcfd = -1, destfd, ret, saved_errno; | |
1645 | char srcbuf[50], destbuf[50]; // only needs enough for /proc/self/fd/<fd> | |
1646 | const char *mntsrc = src; | |
1647 | ||
1648 | if (!rootfs) | |
1649 | rootfs = ""; | |
1650 | ||
1651 | /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */ | |
1652 | if (flags & MS_BIND && src && src[0] != '/') { | |
1653 | INFO("this is a relative bind mount"); | |
1654 | srcfd = open_without_symlink(src, NULL); | |
1655 | if (srcfd < 0) | |
1656 | return srcfd; | |
1657 | ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd); | |
1658 | if (ret < 0 || ret > 50) { | |
1659 | close(srcfd); | |
1660 | ERROR("Out of memory"); | |
1661 | return -EINVAL; | |
1662 | } | |
1663 | mntsrc = srcbuf; | |
1664 | } | |
1665 | ||
1666 | destfd = open_without_symlink(dest, rootfs); | |
1667 | if (destfd < 0) { | |
88e078ba CB |
1668 | if (srcfd != -1) { |
1669 | saved_errno = errno; | |
592fd47a | 1670 | close(srcfd); |
88e078ba CB |
1671 | errno = saved_errno; |
1672 | } | |
592fd47a SH |
1673 | return destfd; |
1674 | } | |
1675 | ||
1676 | ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd); | |
1677 | if (ret < 0 || ret > 50) { | |
1678 | if (srcfd != -1) | |
1679 | close(srcfd); | |
1680 | close(destfd); | |
1681 | ERROR("Out of memory"); | |
1682 | return -EINVAL; | |
1683 | } | |
1684 | ||
1685 | ret = mount(mntsrc, destbuf, fstype, flags, data); | |
1686 | saved_errno = errno; | |
1687 | if (srcfd != -1) | |
1688 | close(srcfd); | |
1689 | close(destfd); | |
1690 | if (ret < 0) { | |
1691 | errno = saved_errno; | |
1692 | SYSERROR("Failed to mount %s onto %s", src, dest); | |
1693 | return ret; | |
1694 | } | |
1695 | ||
1696 | return 0; | |
1697 | } | |
1698 | ||
ced03a01 SH |
1699 | /* |
1700 | * Mount a proc under @rootfs if proc self points to a pid other than | |
1701 | * my own. This is needed to have a known-good proc mount for setting | |
1702 | * up LSMs both at container startup and attach. | |
1703 | * | |
1704 | * @rootfs : the rootfs where proc should be mounted | |
1705 | * | |
1706 | * Returns < 0 on failure, 0 if the correct proc was already mounted | |
1707 | * and 1 if a new proc was mounted. | |
f267d666 BP |
1708 | * |
1709 | * NOTE: not to be called from inside the container namespace! | |
ced03a01 SH |
1710 | */ |
1711 | int mount_proc_if_needed(const char *rootfs) | |
1712 | { | |
1713 | char path[MAXPATHLEN]; | |
1714 | char link[20]; | |
1715 | int linklen, ret; | |
fe447886 | 1716 | int mypid; |
ced03a01 SH |
1717 | |
1718 | ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs); | |
1719 | if (ret < 0 || ret >= MAXPATHLEN) { | |
1720 | SYSERROR("proc path name too long"); | |
1721 | return -1; | |
1722 | } | |
1723 | memset(link, 0, 20); | |
1724 | linklen = readlink(path, link, 20); | |
fe447886 SH |
1725 | mypid = (int)getpid(); |
1726 | INFO("I am %d, /proc/self points to '%s'", mypid, link); | |
ced03a01 | 1727 | ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs); |
d539a2b2 CB |
1728 | if (ret < 0 || ret >= MAXPATHLEN) { |
1729 | SYSERROR("proc path name too long"); | |
1730 | return -1; | |
1731 | } | |
ced03a01 SH |
1732 | if (linklen < 0) /* /proc not mounted */ |
1733 | goto domount; | |
fe447886 | 1734 | if (atoi(link) != mypid) { |
ced03a01 SH |
1735 | /* wrong /procs mounted */ |
1736 | umount2(path, MNT_DETACH); /* ignore failure */ | |
1737 | goto domount; | |
1738 | } | |
1739 | /* the right proc is already mounted */ | |
1740 | return 0; | |
1741 | ||
1742 | domount: | |
f267d666 BP |
1743 | if (!strcmp(rootfs,"")) /* rootfs is NULL */ |
1744 | ret = mount("proc", path, "proc", 0, NULL); | |
1745 | else | |
1746 | ret = safe_mount("proc", path, "proc", 0, NULL, rootfs); | |
1747 | ||
1748 | if (ret < 0) | |
ced03a01 | 1749 | return -1; |
f267d666 | 1750 | |
ced03a01 SH |
1751 | INFO("Mounted /proc in container for security transition"); |
1752 | return 1; | |
1753 | } | |
69aeabac | 1754 | |
f8dd0275 | 1755 | int open_devnull(void) |
69aeabac | 1756 | { |
f8dd0275 AM |
1757 | int fd = open("/dev/null", O_RDWR); |
1758 | ||
1759 | if (fd < 0) | |
1760 | SYSERROR("Can't open /dev/null"); | |
1761 | ||
1762 | return fd; | |
1763 | } | |
69aeabac | 1764 | |
f8dd0275 AM |
1765 | int set_stdfds(int fd) |
1766 | { | |
69aeabac TA |
1767 | if (fd < 0) |
1768 | return -1; | |
1769 | ||
1770 | if (dup2(fd, 0) < 0) | |
f8dd0275 | 1771 | return -1; |
69aeabac | 1772 | if (dup2(fd, 1) < 0) |
f8dd0275 | 1773 | return -1; |
69aeabac | 1774 | if (dup2(fd, 2) < 0) |
f8dd0275 AM |
1775 | return -1; |
1776 | ||
1777 | return 0; | |
1778 | } | |
1779 | ||
1780 | int null_stdfds(void) | |
1781 | { | |
1782 | int ret = -1; | |
1783 | int fd = open_devnull(); | |
1784 | ||
1785 | if (fd >= 0) { | |
1786 | ret = set_stdfds(fd); | |
1787 | close(fd); | |
1788 | } | |
69aeabac | 1789 | |
69aeabac TA |
1790 | return ret; |
1791 | } | |
ccb4cabe SH |
1792 | |
1793 | /* | |
1794 | * Return the number of lines in file @fn, or -1 on error | |
1795 | */ | |
1796 | int lxc_count_file_lines(const char *fn) | |
1797 | { | |
1798 | FILE *f; | |
1799 | char *line = NULL; | |
1800 | size_t sz = 0; | |
1801 | int n = 0; | |
1802 | ||
1803 | f = fopen_cloexec(fn, "r"); | |
1804 | if (!f) | |
1805 | return -1; | |
1806 | ||
1807 | while (getline(&line, &sz, f) != -1) { | |
1808 | n++; | |
1809 | } | |
1810 | free(line); | |
1811 | fclose(f); | |
1812 | return n; | |
1813 | } |