]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.c
Merge pull request #1810 from brauner/2017-09-12/start_move_env_setup
[mirror_lxc.git] / src / lxc / utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "config.h"
25
26 #define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */
27 #include <ctype.h>
28 #include <dirent.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <grp.h>
32 #include <inttypes.h>
33 #include <libgen.h>
34 #include <stddef.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <sys/mman.h>
40 #include <sys/mount.h>
41 #include <sys/param.h>
42 #include <sys/prctl.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <sys/wait.h>
46
47 #include "log.h"
48 #include "lxclock.h"
49 #include "namespace.h"
50 #include "utils.h"
51
52 #ifndef PR_SET_MM
53 #define PR_SET_MM 35
54 #endif
55
56 #ifndef PR_SET_MM_MAP
57 #define PR_SET_MM_MAP 14
58
59 struct prctl_mm_map {
60 uint64_t start_code;
61 uint64_t end_code;
62 uint64_t start_data;
63 uint64_t end_data;
64 uint64_t start_brk;
65 uint64_t brk;
66 uint64_t start_stack;
67 uint64_t arg_start;
68 uint64_t arg_end;
69 uint64_t env_start;
70 uint64_t env_end;
71 uint64_t *auxv;
72 uint32_t auxv_size;
73 uint32_t exe_fd;
74 };
75 #endif
76
77 #ifndef O_PATH
78 #define O_PATH 010000000
79 #endif
80
81 #ifndef O_NOFOLLOW
82 #define O_NOFOLLOW 00400000
83 #endif
84
85 lxc_log_define(lxc_utils, lxc);
86
87 /*
88 * if path is btrfs, tries to remove it and any subvolumes beneath it
89 */
90 extern bool btrfs_try_remove_subvol(const char *path);
91
92 static int _recursive_rmdir(char *dirname, dev_t pdev,
93 const char *exclude, int level, bool onedev)
94 {
95 struct dirent *direntp;
96 DIR *dir;
97 int ret, failed=0;
98 char pathname[MAXPATHLEN];
99 bool hadexclude = false;
100
101 dir = opendir(dirname);
102 if (!dir) {
103 ERROR("failed to open %s", dirname);
104 return -1;
105 }
106
107 while ((direntp = readdir(dir))) {
108 struct stat mystat;
109 int rc;
110
111 if (!direntp)
112 break;
113
114 if (!strcmp(direntp->d_name, ".") ||
115 !strcmp(direntp->d_name, ".."))
116 continue;
117
118 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
119 if (rc < 0 || rc >= MAXPATHLEN) {
120 ERROR("pathname too long");
121 failed=1;
122 continue;
123 }
124
125 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
126 ret = rmdir(pathname);
127 if (ret < 0) {
128 switch(errno) {
129 case ENOTEMPTY:
130 INFO("Not deleting snapshot %s", pathname);
131 hadexclude = true;
132 break;
133 case ENOTDIR:
134 ret = unlink(pathname);
135 if (ret)
136 INFO("Failed to remove %s", pathname);
137 break;
138 default:
139 SYSERROR("Failed to rmdir %s", pathname);
140 failed = 1;
141 break;
142 }
143 }
144 continue;
145 }
146
147 ret = lstat(pathname, &mystat);
148 if (ret) {
149 ERROR("Failed to stat %s", pathname);
150 failed = 1;
151 continue;
152 }
153 if (onedev && mystat.st_dev != pdev) {
154 /* TODO should we be checking /proc/self/mountinfo for
155 * pathname and not doing this if found? */
156 if (btrfs_try_remove_subvol(pathname))
157 INFO("Removed btrfs subvolume at %s\n", pathname);
158 continue;
159 }
160 if (S_ISDIR(mystat.st_mode)) {
161 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
162 failed=1;
163 } else {
164 if (unlink(pathname) < 0) {
165 SYSERROR("Failed to delete %s", pathname);
166 failed=1;
167 }
168 }
169 }
170
171 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
172 ERROR("Failed to delete %s", dirname);
173 failed=1;
174 }
175
176 ret = closedir(dir);
177 if (ret) {
178 ERROR("Failed to close directory %s", dirname);
179 failed=1;
180 }
181
182 return failed ? -1 : 0;
183 }
184
185 /* We have two different magic values for overlayfs, yay. */
186 #ifndef OVERLAYFS_SUPER_MAGIC
187 #define OVERLAYFS_SUPER_MAGIC 0x794c764f
188 #endif
189
190 #ifndef OVERLAY_SUPER_MAGIC
191 #define OVERLAY_SUPER_MAGIC 0x794c7630
192 #endif
193
194 /* In overlayfs, st_dev is unreliable. So on overlayfs we don't do the
195 * lxc_rmdir_onedev()
196 */
197 static bool is_native_overlayfs(const char *path)
198 {
199 if (has_fs_type(path, OVERLAY_SUPER_MAGIC) ||
200 has_fs_type(path, OVERLAYFS_SUPER_MAGIC))
201 return true;
202
203 return false;
204 }
205
206 /* returns 0 on success, -1 if there were any failures */
207 extern int lxc_rmdir_onedev(char *path, const char *exclude)
208 {
209 struct stat mystat;
210 bool onedev = true;
211
212 if (is_native_overlayfs(path)) {
213 onedev = false;
214 }
215
216 if (lstat(path, &mystat) < 0) {
217 if (errno == ENOENT)
218 return 0;
219 ERROR("Failed to stat %s", path);
220 return -1;
221 }
222
223 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
224 }
225
226 /* borrowed from iproute2 */
227 extern int get_u16(unsigned short *val, const char *arg, int base)
228 {
229 unsigned long res;
230 char *ptr;
231
232 if (!arg || !*arg)
233 return -1;
234
235 errno = 0;
236 res = strtoul(arg, &ptr, base);
237 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
238 return -1;
239
240 *val = res;
241
242 return 0;
243 }
244
245 extern int mkdir_p(const char *dir, mode_t mode)
246 {
247 const char *tmp = dir;
248 const char *orig = dir;
249 char *makeme;
250
251 do {
252 dir = tmp + strspn(tmp, "/");
253 tmp = dir + strcspn(dir, "/");
254 makeme = strndup(orig, dir - orig);
255 if (*makeme) {
256 if (mkdir(makeme, mode) && errno != EEXIST) {
257 SYSERROR("failed to create directory '%s'", makeme);
258 free(makeme);
259 return -1;
260 }
261 }
262 free(makeme);
263 } while(tmp != dir);
264
265 return 0;
266 }
267
268 char *get_rundir()
269 {
270 char *rundir;
271 const char *homedir;
272
273 if (geteuid() == 0) {
274 rundir = strdup(RUNTIME_PATH);
275 return rundir;
276 }
277
278 rundir = getenv("XDG_RUNTIME_DIR");
279 if (rundir) {
280 rundir = strdup(rundir);
281 return rundir;
282 }
283
284 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
285 homedir = getenv("HOME");
286 if (!homedir) {
287 ERROR("HOME isn't set in the environment.");
288 return NULL;
289 }
290
291 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
292 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
293
294 return rundir;
295 }
296
297 int wait_for_pid(pid_t pid)
298 {
299 int status, ret;
300
301 again:
302 ret = waitpid(pid, &status, 0);
303 if (ret == -1) {
304 if (errno == EINTR)
305 goto again;
306 return -1;
307 }
308 if (ret != pid)
309 goto again;
310 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
311 return -1;
312 return 0;
313 }
314
315 int lxc_wait_for_pid_status(pid_t pid)
316 {
317 int status, ret;
318
319 again:
320 ret = waitpid(pid, &status, 0);
321 if (ret == -1) {
322 if (errno == EINTR)
323 goto again;
324 return -1;
325 }
326 if (ret != pid)
327 goto again;
328 return status;
329 }
330
331 ssize_t lxc_write_nointr(int fd, const void* buf, size_t count)
332 {
333 ssize_t ret;
334 again:
335 ret = write(fd, buf, count);
336 if (ret < 0 && errno == EINTR)
337 goto again;
338 return ret;
339 }
340
341 ssize_t lxc_read_nointr(int fd, void* buf, size_t count)
342 {
343 ssize_t ret;
344 again:
345 ret = read(fd, buf, count);
346 if (ret < 0 && errno == EINTR)
347 goto again;
348 return ret;
349 }
350
351 ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf)
352 {
353 ssize_t ret;
354 ret = lxc_read_nointr(fd, buf, count);
355 if (ret <= 0)
356 return ret;
357 if ((size_t)ret != count)
358 return -1;
359 if (expected_buf && memcmp(buf, expected_buf, count) != 0) {
360 errno = EINVAL;
361 return -1;
362 }
363 return ret;
364 }
365
366 #if HAVE_LIBGNUTLS
367 #include <gnutls/gnutls.h>
368 #include <gnutls/crypto.h>
369
370 __attribute__((constructor))
371 static void gnutls_lxc_init(void)
372 {
373 gnutls_global_init();
374 }
375
376 int sha1sum_file(char *fnam, unsigned char *digest)
377 {
378 char *buf;
379 int ret;
380 FILE *f;
381 long flen;
382
383 if (!fnam)
384 return -1;
385 f = fopen_cloexec(fnam, "r");
386 if (!f) {
387 SYSERROR("Error opening template");
388 return -1;
389 }
390 if (fseek(f, 0, SEEK_END) < 0) {
391 SYSERROR("Error seeking to end of template");
392 fclose(f);
393 return -1;
394 }
395 if ((flen = ftell(f)) < 0) {
396 SYSERROR("Error telling size of template");
397 fclose(f);
398 return -1;
399 }
400 if (fseek(f, 0, SEEK_SET) < 0) {
401 SYSERROR("Error seeking to start of template");
402 fclose(f);
403 return -1;
404 }
405 if ((buf = malloc(flen+1)) == NULL) {
406 SYSERROR("Out of memory");
407 fclose(f);
408 return -1;
409 }
410 if (fread(buf, 1, flen, f) != flen) {
411 SYSERROR("Failure reading template");
412 free(buf);
413 fclose(f);
414 return -1;
415 }
416 if (fclose(f) < 0) {
417 SYSERROR("Failre closing template");
418 free(buf);
419 return -1;
420 }
421 buf[flen] = '\0';
422 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
423 free(buf);
424 return ret;
425 }
426 #endif
427
428 char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup)
429 {
430 va_list ap2;
431 size_t count = 1 + skip;
432 char **result;
433
434 /* first determine size of argument list, we don't want to reallocate
435 * constantly...
436 */
437 va_copy(ap2, ap);
438 while (1) {
439 char* arg = va_arg(ap2, char*);
440 if (!arg)
441 break;
442 count++;
443 }
444 va_end(ap2);
445
446 result = calloc(count, sizeof(char*));
447 if (!result)
448 return NULL;
449 count = skip;
450 while (1) {
451 char* arg = va_arg(ap, char*);
452 if (!arg)
453 break;
454 arg = do_strdup ? strdup(arg) : arg;
455 if (!arg)
456 goto oom;
457 result[count++] = arg;
458 }
459
460 /* calloc has already set last element to NULL*/
461 return result;
462
463 oom:
464 free(result);
465 return NULL;
466 }
467
468 const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip)
469 {
470 return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0);
471 }
472
473 struct lxc_popen_FILE *lxc_popen(const char *command)
474 {
475 int ret;
476 int pipe_fds[2];
477 pid_t child_pid;
478 struct lxc_popen_FILE *fp = NULL;
479
480 ret = pipe2(pipe_fds, O_CLOEXEC);
481 if (ret < 0)
482 return NULL;
483
484 child_pid = fork();
485 if (child_pid < 0)
486 goto on_error;
487
488 if (!child_pid) {
489 sigset_t mask;
490
491 close(pipe_fds[0]);
492
493 /* duplicate stdout */
494 if (pipe_fds[1] != STDOUT_FILENO)
495 ret = dup2(pipe_fds[1], STDOUT_FILENO);
496 else
497 ret = fcntl(pipe_fds[1], F_SETFD, 0);
498 if (ret < 0) {
499 close(pipe_fds[1]);
500 exit(EXIT_FAILURE);
501 }
502
503 /* duplicate stderr */
504 if (pipe_fds[1] != STDERR_FILENO)
505 ret = dup2(pipe_fds[1], STDERR_FILENO);
506 else
507 ret = fcntl(pipe_fds[1], F_SETFD, 0);
508 close(pipe_fds[1]);
509 if (ret < 0)
510 exit(EXIT_FAILURE);
511
512 /* unblock all signals */
513 ret = sigfillset(&mask);
514 if (ret < 0)
515 exit(EXIT_FAILURE);
516
517 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
518 if (ret < 0)
519 exit(EXIT_FAILURE);
520
521 execl("/bin/sh", "sh", "-c", command, (char *)NULL);
522 exit(127);
523 }
524
525 close(pipe_fds[1]);
526 pipe_fds[1] = -1;
527
528 fp = malloc(sizeof(*fp));
529 if (!fp)
530 goto on_error;
531
532 fp->child_pid = child_pid;
533 fp->pipe = pipe_fds[0];
534
535 fp->f = fdopen(pipe_fds[0], "r");
536 if (!fp->f)
537 goto on_error;
538
539 return fp;
540
541 on_error:
542 if (fp)
543 free(fp);
544
545 if (pipe_fds[0] >= 0)
546 close(pipe_fds[0]);
547
548 if (pipe_fds[1] >= 0)
549 close(pipe_fds[1]);
550
551 return NULL;
552 }
553
554 int lxc_pclose(struct lxc_popen_FILE *fp)
555 {
556 pid_t wait_pid;
557 int wstatus = 0;
558
559 if (!fp)
560 return -1;
561
562 do {
563 wait_pid = waitpid(fp->child_pid, &wstatus, 0);
564 } while (wait_pid < 0 && errno == EINTR);
565
566 close(fp->pipe);
567 fclose(fp->f);
568 free(fp);
569
570 if (wait_pid < 0)
571 return -1;
572
573 return wstatus;
574 }
575
576 char *lxc_string_replace(const char *needle, const char *replacement, const char *haystack)
577 {
578 ssize_t len = -1, saved_len = -1;
579 char *result = NULL;
580 size_t replacement_len = strlen(replacement);
581 size_t needle_len = strlen(needle);
582
583 /* should be executed exactly twice */
584 while (len == -1 || result == NULL) {
585 char *p;
586 char *last_p;
587 ssize_t part_len;
588
589 if (len != -1) {
590 result = calloc(1, len + 1);
591 if (!result)
592 return NULL;
593 saved_len = len;
594 }
595
596 len = 0;
597
598 for (last_p = (char *)haystack, p = strstr(last_p, needle); p; last_p = p, p = strstr(last_p, needle)) {
599 part_len = (ssize_t)(p - last_p);
600 if (result && part_len > 0)
601 memcpy(&result[len], last_p, part_len);
602 len += part_len;
603 if (result && replacement_len > 0)
604 memcpy(&result[len], replacement, replacement_len);
605 len += replacement_len;
606 p += needle_len;
607 }
608 part_len = strlen(last_p);
609 if (result && part_len > 0)
610 memcpy(&result[len], last_p, part_len);
611 len += part_len;
612 }
613
614 /* make sure we did the same thing twice,
615 * once for calculating length, the other
616 * time for copying data */
617 if (saved_len != len) {
618 free(result);
619 return NULL;
620 }
621 /* make sure we didn't overwrite any buffer,
622 * due to calloc the string should be 0-terminated */
623 if (result[len] != '\0') {
624 free(result);
625 return NULL;
626 }
627
628 return result;
629 }
630
631 bool lxc_string_in_array(const char *needle, const char **haystack)
632 {
633 for (; haystack && *haystack; haystack++)
634 if (!strcmp(needle, *haystack))
635 return true;
636 return false;
637 }
638
639 char *lxc_string_join(const char *sep, const char **parts, bool use_as_prefix)
640 {
641 char *result;
642 char **p;
643 size_t sep_len = strlen(sep);
644 size_t result_len = use_as_prefix * sep_len;
645
646 /* calculate new string length */
647 for (p = (char **)parts; *p; p++)
648 result_len += (p > (char **)parts) * sep_len + strlen(*p);
649
650 result = calloc(result_len + 1, 1);
651 if (!result)
652 return NULL;
653
654 if (use_as_prefix)
655 strcpy(result, sep);
656 for (p = (char **)parts; *p; p++) {
657 if (p > (char **)parts)
658 strcat(result, sep);
659 strcat(result, *p);
660 }
661
662 return result;
663 }
664
665 char **lxc_normalize_path(const char *path)
666 {
667 char **components;
668 char **p;
669 size_t components_len = 0;
670 size_t pos = 0;
671
672 components = lxc_string_split(path, '/');
673 if (!components)
674 return NULL;
675 for (p = components; *p; p++)
676 components_len++;
677
678 /* resolve '.' and '..' */
679 for (pos = 0; pos < components_len; ) {
680 if (!strcmp(components[pos], ".") || (!strcmp(components[pos], "..") && pos == 0)) {
681 /* eat this element */
682 free(components[pos]);
683 memmove(&components[pos], &components[pos+1], sizeof(char *) * (components_len - pos));
684 components_len--;
685 } else if (!strcmp(components[pos], "..")) {
686 /* eat this and the previous element */
687 free(components[pos - 1]);
688 free(components[pos]);
689 memmove(&components[pos-1], &components[pos+1], sizeof(char *) * (components_len - pos));
690 components_len -= 2;
691 pos--;
692 } else {
693 pos++;
694 }
695 }
696
697 return components;
698 }
699
700 char *lxc_deslashify(const char *path)
701 {
702 char *dup, *p;
703 char **parts = NULL;
704 size_t n, len;
705
706 dup = strdup(path);
707 if (!dup)
708 return NULL;
709
710 parts = lxc_normalize_path(dup);
711 if (!parts) {
712 free(dup);
713 return NULL;
714 }
715
716 /* We'll end up here if path == "///" or path == "". */
717 if (!*parts) {
718 len = strlen(dup);
719 if (!len) {
720 lxc_free_array((void **)parts, free);
721 return dup;
722 }
723 n = strcspn(dup, "/");
724 if (n == len) {
725 free(dup);
726 lxc_free_array((void **)parts, free);
727
728 p = strdup("/");
729 if (!p)
730 return NULL;
731
732 return p;
733 }
734 }
735
736 p = lxc_string_join("/", (const char **)parts, *dup == '/');
737 free(dup);
738 lxc_free_array((void **)parts, free);
739 return p;
740 }
741
742 char *lxc_append_paths(const char *first, const char *second)
743 {
744 size_t len = strlen(first) + strlen(second) + 1;
745 const char *pattern = "%s%s";
746 char *result = NULL;
747
748 if (second[0] != '/') {
749 len += 1;
750 pattern = "%s/%s";
751 }
752
753 result = calloc(1, len);
754 if (!result)
755 return NULL;
756
757 snprintf(result, len, pattern, first, second);
758 return result;
759 }
760
761 bool lxc_string_in_list(const char *needle, const char *haystack, char _sep)
762 {
763 char *token, *str, *saveptr = NULL;
764 char sep[2] = { _sep, '\0' };
765
766 if (!haystack || !needle)
767 return 0;
768
769 str = alloca(strlen(haystack)+1);
770 strcpy(str, haystack);
771 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
772 if (strcmp(needle, token) == 0)
773 return 1;
774 }
775
776 return 0;
777 }
778
779 char **lxc_string_split(const char *string, char _sep)
780 {
781 char *token, *str, *saveptr = NULL;
782 char sep[2] = {_sep, '\0'};
783 char **tmp = NULL, **result = NULL;
784 size_t result_capacity = 0;
785 size_t result_count = 0;
786 int r, saved_errno;
787
788 if (!string)
789 return calloc(1, sizeof(char *));
790
791 str = alloca(strlen(string) + 1);
792 strcpy(str, string);
793 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
794 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
795 if (r < 0)
796 goto error_out;
797 result[result_count] = strdup(token);
798 if (!result[result_count])
799 goto error_out;
800 result_count++;
801 }
802
803 /* if we allocated too much, reduce it */
804 tmp = realloc(result, (result_count + 1) * sizeof(char *));
805 if (!tmp)
806 goto error_out;
807 result = tmp;
808 /* Make sure we don't return uninitialized memory. */
809 if (result_count == 0)
810 *result = NULL;
811 return result;
812 error_out:
813 saved_errno = errno;
814 lxc_free_array((void **)result, free);
815 errno = saved_errno;
816 return NULL;
817 }
818
819 char **lxc_string_split_and_trim(const char *string, char _sep)
820 {
821 char *token, *str, *saveptr = NULL;
822 char sep[2] = { _sep, '\0' };
823 char **result = NULL;
824 size_t result_capacity = 0;
825 size_t result_count = 0;
826 int r, saved_errno;
827 size_t i = 0;
828
829 if (!string)
830 return calloc(1, sizeof(char *));
831
832 str = alloca(strlen(string)+1);
833 strcpy(str, string);
834 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
835 while (token[0] == ' ' || token[0] == '\t')
836 token++;
837 i = strlen(token);
838 while (i > 0 && (token[i - 1] == ' ' || token[i - 1] == '\t')) {
839 token[i - 1] = '\0';
840 i--;
841 }
842 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
843 if (r < 0)
844 goto error_out;
845 result[result_count] = strdup(token);
846 if (!result[result_count])
847 goto error_out;
848 result_count++;
849 }
850
851 /* if we allocated too much, reduce it */
852 return realloc(result, (result_count + 1) * sizeof(char *));
853 error_out:
854 saved_errno = errno;
855 lxc_free_array((void **)result, free);
856 errno = saved_errno;
857 return NULL;
858 }
859
860 void lxc_free_array(void **array, lxc_free_fn element_free_fn)
861 {
862 void **p;
863 for (p = array; p && *p; p++)
864 element_free_fn(*p);
865 free((void*)array);
866 }
867
868 int lxc_grow_array(void ***array, size_t* capacity, size_t new_size, size_t capacity_increment)
869 {
870 size_t new_capacity;
871 void **new_array;
872
873 /* first time around, catch some trivial mistakes of the user
874 * only initializing one of these */
875 if (!*array || !*capacity) {
876 *array = NULL;
877 *capacity = 0;
878 }
879
880 new_capacity = *capacity;
881 while (new_size + 1 > new_capacity)
882 new_capacity += capacity_increment;
883 if (new_capacity != *capacity) {
884 /* we have to reallocate */
885 new_array = realloc(*array, new_capacity * sizeof(void *));
886 if (!new_array)
887 return -1;
888 memset(&new_array[*capacity], 0, (new_capacity - (*capacity)) * sizeof(void *));
889 *array = new_array;
890 *capacity = new_capacity;
891 }
892
893 /* array has sufficient elements */
894 return 0;
895 }
896
897 size_t lxc_array_len(void **array)
898 {
899 void **p;
900 size_t result = 0;
901
902 for (p = array; p && *p; p++)
903 result++;
904
905 return result;
906 }
907
908 int lxc_write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
909 {
910 int fd, saved_errno;
911 ssize_t ret;
912
913 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
914 if (fd < 0)
915 return -1;
916 ret = lxc_write_nointr(fd, buf, count);
917 if (ret < 0)
918 goto out_error;
919 if ((size_t)ret != count)
920 goto out_error;
921 if (add_newline) {
922 ret = lxc_write_nointr(fd, "\n", 1);
923 if (ret != 1)
924 goto out_error;
925 }
926 close(fd);
927 return 0;
928
929 out_error:
930 saved_errno = errno;
931 close(fd);
932 errno = saved_errno;
933 return -1;
934 }
935
936 int lxc_read_from_file(const char *filename, void* buf, size_t count)
937 {
938 int fd = -1, saved_errno;
939 ssize_t ret;
940
941 fd = open(filename, O_RDONLY | O_CLOEXEC);
942 if (fd < 0)
943 return -1;
944
945 if (!buf || !count) {
946 char buf2[100];
947 size_t count2 = 0;
948 while ((ret = read(fd, buf2, 100)) > 0)
949 count2 += ret;
950 if (ret >= 0)
951 ret = count2;
952 } else {
953 memset(buf, 0, count);
954 ret = read(fd, buf, count);
955 }
956
957 if (ret < 0)
958 ERROR("read %s: %s", filename, strerror(errno));
959
960 saved_errno = errno;
961 close(fd);
962 errno = saved_errno;
963 return ret;
964 }
965
966 void **lxc_append_null_to_array(void **array, size_t count)
967 {
968 void **temp;
969
970 /* Append NULL to the array */
971 if (count) {
972 temp = realloc(array, (count + 1) * sizeof(*array));
973 if (!temp) {
974 size_t i;
975 for (i = 0; i < count; i++)
976 free(array[i]);
977 free(array);
978 return NULL;
979 }
980 array = temp;
981 array[count] = NULL;
982 }
983 return array;
984 }
985
986 int randseed(bool srand_it)
987 {
988 /*
989 srand pre-seed function based on /dev/urandom
990 */
991 unsigned int seed = time(NULL) + getpid();
992
993 FILE *f;
994 f = fopen("/dev/urandom", "r");
995 if (f) {
996 int ret = fread(&seed, sizeof(seed), 1, f);
997 if (ret != 1)
998 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno));
999 fclose(f);
1000 }
1001
1002 if (srand_it)
1003 srand(seed);
1004
1005 return seed;
1006 }
1007
1008 uid_t get_ns_uid(uid_t orig)
1009 {
1010 char *line = NULL;
1011 size_t sz = 0;
1012 uid_t nsid, hostid, range;
1013 FILE *f = fopen("/proc/self/uid_map", "r");
1014 if (!f)
1015 return 0;
1016
1017 while (getline(&line, &sz, f) != -1) {
1018 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
1019 continue;
1020 if (hostid <= orig && hostid + range > orig) {
1021 nsid += orig - hostid;
1022 goto found;
1023 }
1024 }
1025
1026 nsid = 0;
1027 found:
1028 fclose(f);
1029 free(line);
1030 return nsid;
1031 }
1032
1033 bool dir_exists(const char *path)
1034 {
1035 struct stat sb;
1036 int ret;
1037
1038 ret = stat(path, &sb);
1039 if (ret < 0)
1040 /* Could be something other than eexist, just say "no". */
1041 return false;
1042 return S_ISDIR(sb.st_mode);
1043 }
1044
1045 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1046 * FNV has good anti collision properties and we're not worried
1047 * about pre-image resistance or one-way-ness, we're just trying to make
1048 * the name unique in the 108 bytes of space we have.
1049 */
1050 uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1051 {
1052 unsigned char *bp;
1053
1054 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1055 {
1056 /* xor the bottom with the current octet */
1057 hval ^= (uint64_t)*bp;
1058
1059 /* gcc optimised:
1060 * multiply by the 64 bit FNV magic prime mod 2^64
1061 */
1062 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1063 (hval << 7) + (hval << 8) + (hval << 40);
1064 }
1065
1066 return hval;
1067 }
1068
1069 /*
1070 * Detect whether / is mounted MS_SHARED. The only way I know of to
1071 * check that is through /proc/self/mountinfo.
1072 * I'm only checking for /. If the container rootfs or mount location
1073 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1074 * out would be too much work to be worth it.
1075 */
1076 int detect_shared_rootfs(void)
1077 {
1078 char buf[LXC_LINELEN], *p;
1079 FILE *f;
1080 int i;
1081 char *p2;
1082
1083 f = fopen("/proc/self/mountinfo", "r");
1084 if (!f)
1085 return 0;
1086 while (fgets(buf, LXC_LINELEN, f)) {
1087 for (p = buf, i = 0; p && i < 4; i++)
1088 p = strchr(p + 1, ' ');
1089 if (!p)
1090 continue;
1091 p2 = strchr(p + 1, ' ');
1092 if (!p2)
1093 continue;
1094 *p2 = '\0';
1095 if (strcmp(p + 1, "/") == 0) {
1096 /* This is '/'. Is it shared? */
1097 p = strchr(p2 + 1, ' ');
1098 if (p && strstr(p, "shared:")) {
1099 fclose(f);
1100 return 1;
1101 }
1102 }
1103 }
1104 fclose(f);
1105 return 0;
1106 }
1107
1108 bool switch_to_ns(pid_t pid, const char *ns) {
1109 int fd, ret;
1110 char nspath[MAXPATHLEN];
1111
1112 /* Switch to new ns */
1113 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
1114 if (ret < 0 || ret >= MAXPATHLEN)
1115 return false;
1116
1117 fd = open(nspath, O_RDONLY);
1118 if (fd < 0) {
1119 SYSERROR("failed to open %s", nspath);
1120 return false;
1121 }
1122
1123 ret = setns(fd, 0);
1124 if (ret) {
1125 SYSERROR("failed to set process %d to %s of %d.", pid, ns, fd);
1126 close(fd);
1127 return false;
1128 }
1129 close(fd);
1130 return true;
1131 }
1132
1133 /*
1134 * looking at fs/proc_namespace.c, it appears we can
1135 * actually expect the rootfs entry to very specifically contain
1136 * " - rootfs rootfs "
1137 * IIUC, so long as we've chrooted so that rootfs is not our root,
1138 * the rootfs entry should always be skipped in mountinfo contents.
1139 */
1140 bool detect_ramfs_rootfs(void)
1141 {
1142 FILE *f;
1143 char *p, *p2;
1144 char *line = NULL;
1145 size_t len = 0;
1146 int i;
1147
1148 f = fopen("/proc/self/mountinfo", "r");
1149 if (!f)
1150 return false;
1151
1152 while (getline(&line, &len, f) != -1) {
1153 for (p = line, i = 0; p && i < 4; i++)
1154 p = strchr(p + 1, ' ');
1155 if (!p)
1156 continue;
1157 p2 = strchr(p + 1, ' ');
1158 if (!p2)
1159 continue;
1160 *p2 = '\0';
1161 if (strcmp(p + 1, "/") == 0) {
1162 /* This is '/'. Is it the ramfs? */
1163 p = strchr(p2 + 1, '-');
1164 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
1165 free(line);
1166 fclose(f);
1167 return true;
1168 }
1169 }
1170 }
1171 free(line);
1172 fclose(f);
1173 return false;
1174 }
1175
1176 char *on_path(const char *cmd, const char *rootfs) {
1177 char *path = NULL;
1178 char *entry = NULL;
1179 char *saveptr = NULL;
1180 char cmdpath[MAXPATHLEN];
1181 int ret;
1182
1183 path = getenv("PATH");
1184 if (!path)
1185 return NULL;
1186
1187 path = strdup(path);
1188 if (!path)
1189 return NULL;
1190
1191 entry = strtok_r(path, ":", &saveptr);
1192 while (entry) {
1193 if (rootfs)
1194 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs, entry, cmd);
1195 else
1196 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
1197
1198 if (ret < 0 || ret >= MAXPATHLEN)
1199 goto next_loop;
1200
1201 if (access(cmdpath, X_OK) == 0) {
1202 free(path);
1203 return strdup(cmdpath);
1204 }
1205
1206 next_loop:
1207 entry = strtok_r(NULL, ":", &saveptr);
1208 }
1209
1210 free(path);
1211 return NULL;
1212 }
1213
1214 bool file_exists(const char *f)
1215 {
1216 struct stat statbuf;
1217
1218 return stat(f, &statbuf) == 0;
1219 }
1220
1221 bool cgns_supported(void)
1222 {
1223 return file_exists("/proc/self/ns/cgroup");
1224 }
1225
1226 /* historically lxc-init has been under /usr/lib/lxc and under
1227 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1228 */
1229 char *choose_init(const char *rootfs)
1230 {
1231 char *retv = NULL;
1232 const char *empty = "",
1233 *tmp;
1234 int ret, env_set = 0;
1235
1236 if (!getenv("PATH")) {
1237 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1238 SYSERROR("Failed to setenv");
1239 env_set = 1;
1240 }
1241
1242 retv = on_path("init.lxc", rootfs);
1243
1244 if (env_set) {
1245 if (unsetenv("PATH"))
1246 SYSERROR("Failed to unsetenv");
1247 }
1248
1249 if (retv)
1250 return retv;
1251
1252 retv = malloc(PATH_MAX);
1253 if (!retv)
1254 return NULL;
1255
1256 if (rootfs)
1257 tmp = rootfs;
1258 else
1259 tmp = empty;
1260
1261 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
1262 if (ret < 0 || ret >= PATH_MAX) {
1263 ERROR("pathname too long");
1264 goto out1;
1265 }
1266 if (access(retv, X_OK) == 0)
1267 return retv;
1268
1269 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
1270 if (ret < 0 || ret >= PATH_MAX) {
1271 ERROR("pathname too long");
1272 goto out1;
1273 }
1274 if (access(retv, X_OK) == 0)
1275 return retv;
1276
1277 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
1278 if (ret < 0 || ret >= PATH_MAX) {
1279 ERROR("pathname too long");
1280 goto out1;
1281 }
1282 if (access(retv, X_OK) == 0)
1283 return retv;
1284
1285 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
1286 if (ret < 0 || ret >= PATH_MAX) {
1287 ERROR("pathname too long");
1288 goto out1;
1289 }
1290 if (access(retv, X_OK) == 0)
1291 return retv;
1292
1293 /*
1294 * Last resort, look for the statically compiled init.lxc which we
1295 * hopefully bind-mounted in.
1296 * If we are called during container setup, and we get to this point,
1297 * then the init.lxc.static from the host will need to be bind-mounted
1298 * in. So we return NULL here to indicate that.
1299 */
1300 if (rootfs)
1301 goto out1;
1302
1303 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
1304 if (ret < 0 || ret >= PATH_MAX) {
1305 WARN("Nonsense - name /lxc.init.static too long");
1306 goto out1;
1307 }
1308 if (access(retv, X_OK) == 0)
1309 return retv;
1310
1311 out1:
1312 free(retv);
1313 return NULL;
1314 }
1315
1316 int print_to_file(const char *file, const char *content)
1317 {
1318 FILE *f;
1319 int ret = 0;
1320
1321 f = fopen(file, "w");
1322 if (!f)
1323 return -1;
1324 if (fprintf(f, "%s", content) != strlen(content))
1325 ret = -1;
1326 fclose(f);
1327 return ret;
1328 }
1329
1330 int is_dir(const char *path)
1331 {
1332 struct stat statbuf;
1333 int ret = stat(path, &statbuf);
1334 if (ret == 0 && S_ISDIR(statbuf.st_mode))
1335 return 1;
1336 return 0;
1337 }
1338
1339 /*
1340 * Given the '-t' template option to lxc-create, figure out what to
1341 * do. If the template is a full executable path, use that. If it
1342 * is something like 'sshd', then return $templatepath/lxc-sshd.
1343 * On success return the template, on error return NULL.
1344 */
1345 char *get_template_path(const char *t)
1346 {
1347 int ret, len;
1348 char *tpath;
1349
1350 if (t[0] == '/' && access(t, X_OK) == 0) {
1351 tpath = strdup(t);
1352 return tpath;
1353 }
1354
1355 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
1356 tpath = malloc(len);
1357 if (!tpath)
1358 return NULL;
1359 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
1360 if (ret < 0 || ret >= len) {
1361 free(tpath);
1362 return NULL;
1363 }
1364 if (access(tpath, X_OK) < 0) {
1365 SYSERROR("bad template: %s", t);
1366 free(tpath);
1367 return NULL;
1368 }
1369
1370 return tpath;
1371 }
1372
1373 /*
1374 * Sets the process title to the specified title. Note that this may fail if
1375 * the kernel doesn't support PR_SET_MM_MAP (kernels <3.18).
1376 */
1377 int setproctitle(char *title)
1378 {
1379 static char *proctitle = NULL;
1380 char buf[2048], *tmp;
1381 FILE *f;
1382 int i, len, ret = 0;
1383
1384 /* We don't really need to know all of this stuff, but unfortunately
1385 * PR_SET_MM_MAP requires us to set it all at once, so we have to
1386 * figure it out anyway.
1387 */
1388 unsigned long start_data, end_data, start_brk, start_code, end_code,
1389 start_stack, arg_start, arg_end, env_start, env_end,
1390 brk_val;
1391 struct prctl_mm_map prctl_map;
1392
1393 f = fopen_cloexec("/proc/self/stat", "r");
1394 if (!f) {
1395 return -1;
1396 }
1397
1398 tmp = fgets(buf, sizeof(buf), f);
1399 fclose(f);
1400 if (!tmp) {
1401 return -1;
1402 }
1403
1404 /* Skip the first 25 fields, column 26-28 are start_code, end_code,
1405 * and start_stack */
1406 tmp = strchr(buf, ' ');
1407 for (i = 0; i < 24; i++) {
1408 if (!tmp)
1409 return -1;
1410 tmp = strchr(tmp+1, ' ');
1411 }
1412 if (!tmp)
1413 return -1;
1414
1415 i = sscanf(tmp, "%lu %lu %lu", &start_code, &end_code, &start_stack);
1416 if (i != 3)
1417 return -1;
1418
1419 /* Skip the next 19 fields, column 45-51 are start_data to arg_end */
1420 for (i = 0; i < 19; i++) {
1421 if (!tmp)
1422 return -1;
1423 tmp = strchr(tmp+1, ' ');
1424 }
1425
1426 if (!tmp)
1427 return -1;
1428
1429 i = sscanf(tmp, "%lu %lu %lu %*u %*u %lu %lu",
1430 &start_data,
1431 &end_data,
1432 &start_brk,
1433 &env_start,
1434 &env_end);
1435 if (i != 5)
1436 return -1;
1437
1438 /* Include the null byte here, because in the calculations below we
1439 * want to have room for it. */
1440 len = strlen(title) + 1;
1441
1442 proctitle = realloc(proctitle, len);
1443 if (!proctitle)
1444 return -1;
1445
1446 arg_start = (unsigned long) proctitle;
1447 arg_end = arg_start + len;
1448
1449 brk_val = syscall(__NR_brk, 0);
1450
1451 prctl_map = (struct prctl_mm_map) {
1452 .start_code = start_code,
1453 .end_code = end_code,
1454 .start_stack = start_stack,
1455 .start_data = start_data,
1456 .end_data = end_data,
1457 .start_brk = start_brk,
1458 .brk = brk_val,
1459 .arg_start = arg_start,
1460 .arg_end = arg_end,
1461 .env_start = env_start,
1462 .env_end = env_end,
1463 .auxv = NULL,
1464 .auxv_size = 0,
1465 .exe_fd = -1,
1466 };
1467
1468 ret = prctl(PR_SET_MM, PR_SET_MM_MAP, (long) &prctl_map, sizeof(prctl_map), 0);
1469 if (ret == 0)
1470 strcpy((char*)arg_start, title);
1471 else
1472 INFO("setting cmdline failed - %s", strerror(errno));
1473
1474 return ret;
1475 }
1476
1477 /*
1478 * @path: a pathname where / replaced with '\0'.
1479 * @offsetp: pointer to int showing which path segment was last seen.
1480 * Updated on return to reflect the next segment.
1481 * @fulllen: full original path length.
1482 * Returns a pointer to the next path segment, or NULL if done.
1483 */
1484 static char *get_nextpath(char *path, int *offsetp, int fulllen)
1485 {
1486 int offset = *offsetp;
1487
1488 if (offset >= fulllen)
1489 return NULL;
1490
1491 while (path[offset] != '\0' && offset < fulllen)
1492 offset++;
1493 while (path[offset] == '\0' && offset < fulllen)
1494 offset++;
1495
1496 *offsetp = offset;
1497 return (offset < fulllen) ? &path[offset] : NULL;
1498 }
1499
1500 /*
1501 * Check that @subdir is a subdir of @dir. @len is the length of
1502 * @dir (to avoid having to recalculate it).
1503 */
1504 static bool is_subdir(const char *subdir, const char *dir, size_t len)
1505 {
1506 size_t subdirlen = strlen(subdir);
1507
1508 if (subdirlen < len)
1509 return false;
1510 if (strncmp(subdir, dir, len) != 0)
1511 return false;
1512 if (dir[len-1] == '/')
1513 return true;
1514 if (subdir[len] == '/' || subdirlen == len)
1515 return true;
1516 return false;
1517 }
1518
1519 /*
1520 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
1521 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
1522 */
1523 static int check_symlink(int fd)
1524 {
1525 struct stat sb;
1526 int ret = fstat(fd, &sb);
1527 if (ret < 0)
1528 return -ENOENT;
1529 if (S_ISLNK(sb.st_mode))
1530 return -ELOOP;
1531 return 0;
1532 }
1533
1534 /*
1535 * Open a file or directory, provided that it contains no symlinks.
1536 *
1537 * CAVEAT: This function must not be used for other purposes than container
1538 * setup before executing the container's init
1539 */
1540 static int open_if_safe(int dirfd, const char *nextpath)
1541 {
1542 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1543 if (newfd >= 0) /* Was not a symlink, all good. */
1544 return newfd;
1545
1546 if (errno == ELOOP)
1547 return newfd;
1548
1549 if (errno == EPERM || errno == EACCES) {
1550 /* We're not root (cause we got EPERM) so try opening with
1551 * O_PATH.
1552 */
1553 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
1554 if (newfd >= 0) {
1555 /* O_PATH will return an fd for symlinks. We know
1556 * nextpath wasn't a symlink at last openat, so if fd is
1557 * now a link, then something * fishy is going on.
1558 */
1559 int ret = check_symlink(newfd);
1560 if (ret < 0) {
1561 close(newfd);
1562 newfd = ret;
1563 }
1564 }
1565 }
1566
1567 return newfd;
1568 }
1569
1570 /*
1571 * Open a path intending for mounting, ensuring that the final path
1572 * is inside the container's rootfs.
1573 *
1574 * CAVEAT: This function must not be used for other purposes than container
1575 * setup before executing the container's init
1576 *
1577 * @target: path to be opened
1578 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1579 * would be the container's rootfs.
1580 *
1581 * Return an open fd for the path, or <0 on error.
1582 */
1583 static int open_without_symlink(const char *target, const char *prefix_skip)
1584 {
1585 int curlen = 0, dirfd, fulllen, i;
1586 char *dup = NULL;
1587
1588 fulllen = strlen(target);
1589
1590 /* make sure prefix-skip makes sense */
1591 if (prefix_skip && strlen(prefix_skip) > 0) {
1592 curlen = strlen(prefix_skip);
1593 if (!is_subdir(target, prefix_skip, curlen)) {
1594 ERROR("WHOA there - target '%s' didn't start with prefix '%s'",
1595 target, prefix_skip);
1596 return -EINVAL;
1597 }
1598 /*
1599 * get_nextpath() expects the curlen argument to be
1600 * on a (turned into \0) / or before it, so decrement
1601 * curlen to make sure that happens
1602 */
1603 if (curlen)
1604 curlen--;
1605 } else {
1606 prefix_skip = "/";
1607 curlen = 0;
1608 }
1609
1610 /* Make a copy of target which we can hack up, and tokenize it */
1611 if ((dup = strdup(target)) == NULL) {
1612 SYSERROR("Out of memory checking for symbolic link");
1613 return -ENOMEM;
1614 }
1615 for (i = 0; i < fulllen; i++) {
1616 if (dup[i] == '/')
1617 dup[i] = '\0';
1618 }
1619
1620 dirfd = open(prefix_skip, O_RDONLY);
1621 if (dirfd < 0)
1622 goto out;
1623 while (1) {
1624 int newfd, saved_errno;
1625 char *nextpath;
1626
1627 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1628 goto out;
1629 newfd = open_if_safe(dirfd, nextpath);
1630 saved_errno = errno;
1631 close(dirfd);
1632 dirfd = newfd;
1633 if (newfd < 0) {
1634 errno = saved_errno;
1635 if (errno == ELOOP)
1636 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
1637 goto out;
1638 }
1639 }
1640
1641 out:
1642 free(dup);
1643 return dirfd;
1644 }
1645
1646 /*
1647 * Safely mount a path into a container, ensuring that the mount target
1648 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1649 * uses the host's /)
1650 *
1651 * CAVEAT: This function must not be used for other purposes than container
1652 * setup before executing the container's init
1653 */
1654 int safe_mount(const char *src, const char *dest, const char *fstype,
1655 unsigned long flags, const void *data, const char *rootfs)
1656 {
1657 int destfd, ret, saved_errno;
1658 /* Only needs enough for /proc/self/fd/<fd>. */
1659 char srcbuf[50], destbuf[50];
1660 int srcfd = -1;
1661 const char *mntsrc = src;
1662
1663 if (!rootfs)
1664 rootfs = "";
1665
1666 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1667 if (flags & MS_BIND && src && src[0] != '/') {
1668 INFO("this is a relative bind mount");
1669 srcfd = open_without_symlink(src, NULL);
1670 if (srcfd < 0)
1671 return srcfd;
1672 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1673 if (ret < 0 || ret > 50) {
1674 close(srcfd);
1675 ERROR("Out of memory");
1676 return -EINVAL;
1677 }
1678 mntsrc = srcbuf;
1679 }
1680
1681 destfd = open_without_symlink(dest, rootfs);
1682 if (destfd < 0) {
1683 if (srcfd != -1) {
1684 saved_errno = errno;
1685 close(srcfd);
1686 errno = saved_errno;
1687 }
1688 return destfd;
1689 }
1690
1691 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1692 if (ret < 0 || ret > 50) {
1693 if (srcfd != -1)
1694 close(srcfd);
1695 close(destfd);
1696 ERROR("Out of memory");
1697 return -EINVAL;
1698 }
1699
1700 ret = mount(mntsrc, destbuf, fstype, flags, data);
1701 saved_errno = errno;
1702 if (srcfd != -1)
1703 close(srcfd);
1704 close(destfd);
1705 if (ret < 0) {
1706 errno = saved_errno;
1707 SYSERROR("Failed to mount %s onto %s", src, dest);
1708 return ret;
1709 }
1710
1711 return 0;
1712 }
1713
1714 /*
1715 * Mount a proc under @rootfs if proc self points to a pid other than
1716 * my own. This is needed to have a known-good proc mount for setting
1717 * up LSMs both at container startup and attach.
1718 *
1719 * @rootfs : the rootfs where proc should be mounted
1720 *
1721 * Returns < 0 on failure, 0 if the correct proc was already mounted
1722 * and 1 if a new proc was mounted.
1723 *
1724 * NOTE: not to be called from inside the container namespace!
1725 */
1726 int lxc_mount_proc_if_needed(const char *rootfs)
1727 {
1728 char path[MAXPATHLEN];
1729 int link_to_pid, linklen, mypid, ret;
1730 char link[LXC_NUMSTRLEN64] = {0};
1731
1732 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1733 if (ret < 0 || ret >= MAXPATHLEN) {
1734 SYSERROR("proc path name too long");
1735 return -1;
1736 }
1737
1738 linklen = readlink(path, link, LXC_NUMSTRLEN64);
1739
1740 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
1741 if (ret < 0 || ret >= MAXPATHLEN) {
1742 SYSERROR("proc path name too long");
1743 return -1;
1744 }
1745
1746 /* /proc not mounted */
1747 if (linklen < 0) {
1748 if (mkdir(path, 0755) && errno != EEXIST)
1749 return -1;
1750 goto domount;
1751 } else if (linklen >= LXC_NUMSTRLEN64) {
1752 link[linklen - 1] = '\0';
1753 ERROR("readlink returned truncated content: \"%s\"", link);
1754 return -1;
1755 }
1756
1757 mypid = getpid();
1758 INFO("I am %d, /proc/self points to \"%s\"", mypid, link);
1759
1760 if (lxc_safe_int(link, &link_to_pid) < 0)
1761 return -1;
1762
1763 /* correct procfs is already mounted */
1764 if (link_to_pid == mypid)
1765 return 0;
1766
1767 ret = umount2(path, MNT_DETACH);
1768 if (ret < 0)
1769 WARN("failed to umount \"%s\" with MNT_DETACH", path);
1770
1771 domount:
1772 /* rootfs is NULL */
1773 if (!strcmp(rootfs, ""))
1774 ret = mount("proc", path, "proc", 0, NULL);
1775 else
1776 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
1777 if (ret < 0)
1778 return -1;
1779
1780 INFO("mounted /proc in container for security transition");
1781 return 1;
1782 }
1783
1784 int open_devnull(void)
1785 {
1786 int fd = open("/dev/null", O_RDWR);
1787
1788 if (fd < 0)
1789 SYSERROR("Can't open /dev/null");
1790
1791 return fd;
1792 }
1793
1794 int set_stdfds(int fd)
1795 {
1796 int ret;
1797
1798 if (fd < 0)
1799 return -1;
1800
1801 ret = dup2(fd, STDIN_FILENO);
1802 if (ret < 0)
1803 return -1;
1804
1805 ret = dup2(fd, STDOUT_FILENO);
1806 if (ret < 0)
1807 return -1;
1808
1809 ret = dup2(fd, STDERR_FILENO);
1810 if (ret < 0)
1811 return -1;
1812
1813 return 0;
1814 }
1815
1816 int null_stdfds(void)
1817 {
1818 int ret = -1;
1819 int fd = open_devnull();
1820
1821 if (fd >= 0) {
1822 ret = set_stdfds(fd);
1823 close(fd);
1824 }
1825
1826 return ret;
1827 }
1828
1829 /*
1830 * Return the number of lines in file @fn, or -1 on error
1831 */
1832 int lxc_count_file_lines(const char *fn)
1833 {
1834 FILE *f;
1835 char *line = NULL;
1836 size_t sz = 0;
1837 int n = 0;
1838
1839 f = fopen_cloexec(fn, "r");
1840 if (!f)
1841 return -1;
1842
1843 while (getline(&line, &sz, f) != -1) {
1844 n++;
1845 }
1846 free(line);
1847 fclose(f);
1848 return n;
1849 }
1850
1851 void *lxc_strmmap(void *addr, size_t length, int prot, int flags, int fd,
1852 off_t offset)
1853 {
1854 void *tmp = NULL, *overlap = NULL;
1855
1856 /* We establish an anonymous mapping that is one byte larger than the
1857 * underlying file. The pages handed to us are zero filled. */
1858 tmp = mmap(addr, length + 1, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1859 if (tmp == MAP_FAILED)
1860 return tmp;
1861
1862 /* Now we establish a fixed-address mapping starting at the address we
1863 * received from our anonymous mapping and replace all bytes excluding
1864 * the additional \0-byte with the file. This allows us to use normal
1865 * string-handling functions. */
1866 overlap = mmap(tmp, length, prot, MAP_FIXED | flags, fd, offset);
1867 if (overlap == MAP_FAILED)
1868 munmap(tmp, length + 1);
1869
1870 return overlap;
1871 }
1872
1873 int lxc_strmunmap(void *addr, size_t length)
1874 {
1875 return munmap(addr, length + 1);
1876 }
1877
1878 /* Check whether a signal is blocked by a process. */
1879 /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
1880 #define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
1881 bool task_blocking_signal(pid_t pid, int signal)
1882 {
1883 bool bret = false;
1884 char *line = NULL;
1885 long unsigned int sigblk = 0;
1886 size_t n = 0;
1887 int ret;
1888 FILE *f;
1889
1890 char status[__PROC_STATUS_LEN];
1891
1892 ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid);
1893 if (ret < 0 || ret >= __PROC_STATUS_LEN)
1894 return bret;
1895
1896 f = fopen(status, "r");
1897 if (!f)
1898 return bret;
1899
1900 while (getline(&line, &n, f) != -1) {
1901 if (strncmp(line, "SigBlk:\t", 8))
1902 continue;
1903
1904 if (sscanf(line + 8, "%lx", &sigblk) != 1)
1905 goto out;
1906 }
1907
1908 if (sigblk & (1LU << (signal - 1)))
1909 bret = true;
1910
1911 out:
1912 free(line);
1913 fclose(f);
1914 return bret;
1915 }
1916
1917 static int lxc_append_null_to_list(void ***list)
1918 {
1919 int newentry = 0;
1920 void **tmp;
1921
1922 if (*list)
1923 for (; (*list)[newentry]; newentry++) {
1924 ;
1925 }
1926
1927 tmp = realloc(*list, (newentry + 2) * sizeof(void **));
1928 if (!tmp)
1929 return -1;
1930
1931 *list = tmp;
1932 (*list)[newentry + 1] = NULL;
1933
1934 return newentry;
1935 }
1936
1937 int lxc_append_string(char ***list, char *entry)
1938 {
1939 char *copy;
1940 int newentry;
1941
1942 newentry = lxc_append_null_to_list((void ***)list);
1943 if (newentry < 0)
1944 return -1;
1945
1946 copy = strdup(entry);
1947 if (!copy)
1948 return -1;
1949
1950 (*list)[newentry] = copy;
1951
1952 return 0;
1953 }
1954
1955 int lxc_preserve_ns(const int pid, const char *ns)
1956 {
1957 int ret;
1958 /* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1959 #define __NS_PATH_LEN 50
1960 char path[__NS_PATH_LEN];
1961
1962 /* This way we can use this function to also check whether namespaces
1963 * are supported by the kernel by passing in the NULL or the empty
1964 * string.
1965 */
1966 ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
1967 !ns || strcmp(ns, "") == 0 ? "" : "/",
1968 !ns || strcmp(ns, "") == 0 ? "" : ns);
1969 if (ret < 0 || (size_t)ret >= __NS_PATH_LEN)
1970 return -1;
1971
1972 return open(path, O_RDONLY | O_CLOEXEC);
1973 }
1974
1975 int lxc_safe_uint(const char *numstr, unsigned int *converted)
1976 {
1977 char *err = NULL;
1978 unsigned long int uli;
1979
1980 while (isspace(*numstr))
1981 numstr++;
1982
1983 if (*numstr == '-')
1984 return -EINVAL;
1985
1986 errno = 0;
1987 uli = strtoul(numstr, &err, 0);
1988 if (errno == ERANGE && uli == ULONG_MAX)
1989 return -ERANGE;
1990
1991 if (err == numstr || *err != '\0')
1992 return -EINVAL;
1993
1994 if (uli > UINT_MAX)
1995 return -ERANGE;
1996
1997 *converted = (unsigned int)uli;
1998 return 0;
1999 }
2000
2001 int lxc_safe_ulong(const char *numstr, unsigned long *converted)
2002 {
2003 char *err = NULL;
2004 unsigned long int uli;
2005
2006 while (isspace(*numstr))
2007 numstr++;
2008
2009 if (*numstr == '-')
2010 return -EINVAL;
2011
2012 errno = 0;
2013 uli = strtoul(numstr, &err, 0);
2014 if (errno == ERANGE && uli == ULONG_MAX)
2015 return -ERANGE;
2016
2017 if (err == numstr || *err != '\0')
2018 return -EINVAL;
2019
2020 *converted = uli;
2021 return 0;
2022 }
2023
2024 int lxc_safe_int(const char *numstr, int *converted)
2025 {
2026 char *err = NULL;
2027 signed long int sli;
2028
2029 errno = 0;
2030 sli = strtol(numstr, &err, 0);
2031 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
2032 return -ERANGE;
2033
2034 if (errno != 0 && sli == 0)
2035 return -EINVAL;
2036
2037 if (err == numstr || *err != '\0')
2038 return -EINVAL;
2039
2040 if (sli > INT_MAX || sli < INT_MIN)
2041 return -ERANGE;
2042
2043 *converted = (int)sli;
2044 return 0;
2045 }
2046
2047 int lxc_safe_long(const char *numstr, long int *converted)
2048 {
2049 char *err = NULL;
2050 signed long int sli;
2051
2052 errno = 0;
2053 sli = strtol(numstr, &err, 0);
2054 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
2055 return -ERANGE;
2056
2057 if (errno != 0 && sli == 0)
2058 return -EINVAL;
2059
2060 if (err == numstr || *err != '\0')
2061 return -EINVAL;
2062
2063 *converted = sli;
2064 return 0;
2065 }
2066
2067 int lxc_switch_uid_gid(uid_t uid, gid_t gid)
2068 {
2069 if (setgid(gid) < 0) {
2070 SYSERROR("Failed to switch to gid %d.", gid);
2071 return -errno;
2072 }
2073 NOTICE("Switched to gid %d.", gid);
2074
2075 if (setuid(uid) < 0) {
2076 SYSERROR("Failed to switch to uid %d.", uid);
2077 return -errno;
2078 }
2079 NOTICE("Switched to uid %d.", uid);
2080
2081 return 0;
2082 }
2083
2084 /* Simple covenience function which enables uniform logging. */
2085 int lxc_setgroups(int size, gid_t list[])
2086 {
2087 if (setgroups(size, list) < 0) {
2088 SYSERROR("Failed to setgroups().");
2089 return -errno;
2090 }
2091 NOTICE("Dropped additional groups.");
2092
2093 return 0;
2094 }
2095
2096 static int lxc_get_unused_loop_dev_legacy(char *loop_name)
2097 {
2098 struct dirent *dp;
2099 struct loop_info64 lo64;
2100 DIR *dir;
2101 int dfd = -1, fd = -1, ret = -1;
2102
2103 dir = opendir("/dev");
2104 if (!dir)
2105 return -1;
2106
2107 while ((dp = readdir(dir))) {
2108 if (!dp)
2109 break;
2110
2111 if (strncmp(dp->d_name, "loop", 4) != 0)
2112 continue;
2113
2114 dfd = dirfd(dir);
2115 if (dfd < 0)
2116 continue;
2117
2118 fd = openat(dfd, dp->d_name, O_RDWR);
2119 if (fd < 0)
2120 continue;
2121
2122 ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
2123 if (ret < 0) {
2124 if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 ||
2125 errno != ENXIO) {
2126 close(fd);
2127 fd = -1;
2128 continue;
2129 }
2130 }
2131
2132 ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name);
2133 if (ret < 0 || ret >= LO_NAME_SIZE) {
2134 close(fd);
2135 fd = -1;
2136 continue;
2137 }
2138
2139 break;
2140 }
2141
2142 closedir(dir);
2143
2144 if (fd < 0)
2145 return -1;
2146
2147 return fd;
2148 }
2149
2150 static int lxc_get_unused_loop_dev(char *name_loop)
2151 {
2152 int loop_nr, ret;
2153 int fd_ctl = -1, fd_tmp = -1;
2154
2155 fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC);
2156 if (fd_ctl < 0)
2157 return -ENODEV;
2158
2159 loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE);
2160 if (loop_nr < 0)
2161 goto on_error;
2162
2163 ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr);
2164 if (ret < 0 || ret >= LO_NAME_SIZE)
2165 goto on_error;
2166
2167 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
2168 if (fd_tmp < 0)
2169 goto on_error;
2170
2171 on_error:
2172 close(fd_ctl);
2173 return fd_tmp;
2174 }
2175
2176 int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags)
2177 {
2178 int ret;
2179 struct loop_info64 lo64;
2180 int fd_img = -1, fret = -1, fd_loop = -1;
2181
2182 fd_loop = lxc_get_unused_loop_dev(loop_dev);
2183 if (fd_loop < 0) {
2184 if (fd_loop == -ENODEV)
2185 fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev);
2186 else
2187 goto on_error;
2188 }
2189
2190 fd_img = open(source, O_RDWR | O_CLOEXEC);
2191 if (fd_img < 0)
2192 goto on_error;
2193
2194 ret = ioctl(fd_loop, LOOP_SET_FD, fd_img);
2195 if (ret < 0)
2196 goto on_error;
2197
2198 memset(&lo64, 0, sizeof(lo64));
2199 lo64.lo_flags = flags;
2200
2201 ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64);
2202 if (ret < 0)
2203 goto on_error;
2204
2205 fret = 0;
2206
2207 on_error:
2208 if (fd_img >= 0)
2209 close(fd_img);
2210
2211 if (fret < 0 && fd_loop >= 0) {
2212 close(fd_loop);
2213 fd_loop = -1;
2214 }
2215
2216 return fd_loop;
2217 }
2218
2219 int lxc_unstack_mountpoint(const char *path, bool lazy)
2220 {
2221 int ret;
2222 int umounts = 0;
2223
2224 pop_stack:
2225 ret = umount2(path, lazy ? MNT_DETACH : 0);
2226 if (ret < 0) {
2227 /* We consider anything else than EINVAL deadly to prevent going
2228 * into an infinite loop. (The other alternative is constantly
2229 * parsing /proc/self/mountinfo which is yucky and probably
2230 * racy.)
2231 */
2232 if (errno != EINVAL)
2233 return -errno;
2234 } else {
2235 /* Just stop counting when this happens. That'd just be so
2236 * stupid that we won't even bother trying to report back the
2237 * correct value anymore.
2238 */
2239 if (umounts != INT_MAX)
2240 umounts++;
2241 /* We succeeded in umounting. Make sure that there's no other
2242 * mountpoint stacked underneath.
2243 */
2244 goto pop_stack;
2245 }
2246
2247 return umounts;
2248 }
2249
2250 int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
2251 {
2252 pid_t child;
2253 int ret, fret, pipefd[2];
2254 ssize_t bytes;
2255
2256 /* Make sure our callers do not receive unitialized memory. */
2257 if (buf_size > 0 && buf)
2258 buf[0] = '\0';
2259
2260 if (pipe(pipefd) < 0) {
2261 SYSERROR("failed to create pipe");
2262 return -1;
2263 }
2264
2265 child = fork();
2266 if (child < 0) {
2267 close(pipefd[0]);
2268 close(pipefd[1]);
2269 SYSERROR("failed to create new process");
2270 return -1;
2271 }
2272
2273 if (child == 0) {
2274 /* Close the read-end of the pipe. */
2275 close(pipefd[0]);
2276
2277 /* Redirect std{err,out} to write-end of the
2278 * pipe.
2279 */
2280 ret = dup2(pipefd[1], STDOUT_FILENO);
2281 if (ret >= 0)
2282 ret = dup2(pipefd[1], STDERR_FILENO);
2283
2284 /* Close the write-end of the pipe. */
2285 close(pipefd[1]);
2286
2287 if (ret < 0) {
2288 SYSERROR("failed to duplicate std{err,out} file descriptor");
2289 exit(EXIT_FAILURE);
2290 }
2291
2292 /* Does not return. */
2293 child_fn(args);
2294 ERROR("failed to exec command");
2295 exit(EXIT_FAILURE);
2296 }
2297
2298 /* close the write-end of the pipe */
2299 close(pipefd[1]);
2300
2301 if (buf && buf_size > 0) {
2302 bytes = read(pipefd[0], buf, buf_size - 1);
2303 if (bytes > 0)
2304 buf[bytes - 1] = '\0';
2305 }
2306
2307 fret = wait_for_pid(child);
2308 /* close the read-end of the pipe */
2309 close(pipefd[0]);
2310
2311 return fret;
2312 }
2313
2314 char *must_make_path(const char *first, ...)
2315 {
2316 va_list args;
2317 char *cur, *dest;
2318 size_t full_len = strlen(first);
2319
2320 dest = must_copy_string(first);
2321
2322 va_start(args, first);
2323 while ((cur = va_arg(args, char *)) != NULL) {
2324 full_len += strlen(cur);
2325 if (cur[0] != '/')
2326 full_len++;
2327 dest = must_realloc(dest, full_len + 1);
2328 if (cur[0] != '/')
2329 strcat(dest, "/");
2330 strcat(dest, cur);
2331 }
2332 va_end(args);
2333
2334 return dest;
2335 }
2336
2337 char *must_copy_string(const char *entry)
2338 {
2339 char *ret;
2340
2341 if (!entry)
2342 return NULL;
2343 do {
2344 ret = strdup(entry);
2345 } while (!ret);
2346
2347 return ret;
2348 }
2349
2350 void *must_realloc(void *orig, size_t sz)
2351 {
2352 void *ret;
2353
2354 do {
2355 ret = realloc(orig, sz);
2356 } while (!ret);
2357
2358 return ret;
2359 }
2360
2361 bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val)
2362 {
2363 return (fs->f_type == (fs_type_magic)magic_val);
2364 }
2365
2366 bool has_fs_type(const char *path, fs_type_magic magic_val)
2367 {
2368 bool has_type;
2369 int ret;
2370 struct statfs sb;
2371
2372 ret = statfs(path, &sb);
2373 if (ret < 0)
2374 return false;
2375
2376 has_type = is_fs_type(&sb, magic_val);
2377 if (!has_type && magic_val == RAMFS_MAGIC)
2378 WARN("When the ramfs it a tmpfs statfs() might report tmpfs");
2379
2380 return has_type;
2381 }
2382
2383 bool lxc_nic_exists(char *nic)
2384 {
2385 #define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
2386 char path[__LXC_SYS_CLASS_NET_LEN];
2387 int ret;
2388 struct stat sb;
2389
2390 if (!strcmp(nic, "none"))
2391 return true;
2392
2393 ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic);
2394 if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN)
2395 return false;
2396
2397 ret = stat(path, &sb);
2398 if (ret < 0)
2399 return false;
2400
2401 return true;
2402 }