]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.c
Merge pull request #1801 from brauner/2017-09-09/userns_exec
[mirror_lxc.git] / src / lxc / utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "config.h"
25
26 #define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */
27 #include <ctype.h>
28 #include <dirent.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <grp.h>
32 #include <inttypes.h>
33 #include <libgen.h>
34 #include <stddef.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <sys/mman.h>
40 #include <sys/mount.h>
41 #include <sys/param.h>
42 #include <sys/prctl.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <sys/wait.h>
46
47 #include "log.h"
48 #include "lxclock.h"
49 #include "namespace.h"
50 #include "utils.h"
51
52 #ifndef PR_SET_MM
53 #define PR_SET_MM 35
54 #endif
55
56 #ifndef PR_SET_MM_MAP
57 #define PR_SET_MM_MAP 14
58
59 struct prctl_mm_map {
60 uint64_t start_code;
61 uint64_t end_code;
62 uint64_t start_data;
63 uint64_t end_data;
64 uint64_t start_brk;
65 uint64_t brk;
66 uint64_t start_stack;
67 uint64_t arg_start;
68 uint64_t arg_end;
69 uint64_t env_start;
70 uint64_t env_end;
71 uint64_t *auxv;
72 uint32_t auxv_size;
73 uint32_t exe_fd;
74 };
75 #endif
76
77 #ifndef O_PATH
78 #define O_PATH 010000000
79 #endif
80
81 #ifndef O_NOFOLLOW
82 #define O_NOFOLLOW 00400000
83 #endif
84
85 lxc_log_define(lxc_utils, lxc);
86
87 /*
88 * if path is btrfs, tries to remove it and any subvolumes beneath it
89 */
90 extern bool btrfs_try_remove_subvol(const char *path);
91
92 static int _recursive_rmdir(char *dirname, dev_t pdev,
93 const char *exclude, int level, bool onedev)
94 {
95 struct dirent *direntp;
96 DIR *dir;
97 int ret, failed=0;
98 char pathname[MAXPATHLEN];
99 bool hadexclude = false;
100
101 dir = opendir(dirname);
102 if (!dir) {
103 ERROR("failed to open %s", dirname);
104 return -1;
105 }
106
107 while ((direntp = readdir(dir))) {
108 struct stat mystat;
109 int rc;
110
111 if (!direntp)
112 break;
113
114 if (!strcmp(direntp->d_name, ".") ||
115 !strcmp(direntp->d_name, ".."))
116 continue;
117
118 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
119 if (rc < 0 || rc >= MAXPATHLEN) {
120 ERROR("pathname too long");
121 failed=1;
122 continue;
123 }
124
125 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
126 ret = rmdir(pathname);
127 if (ret < 0) {
128 switch(errno) {
129 case ENOTEMPTY:
130 INFO("Not deleting snapshot %s", pathname);
131 hadexclude = true;
132 break;
133 case ENOTDIR:
134 ret = unlink(pathname);
135 if (ret)
136 INFO("Failed to remove %s", pathname);
137 break;
138 default:
139 SYSERROR("Failed to rmdir %s", pathname);
140 failed = 1;
141 break;
142 }
143 }
144 continue;
145 }
146
147 ret = lstat(pathname, &mystat);
148 if (ret) {
149 ERROR("Failed to stat %s", pathname);
150 failed = 1;
151 continue;
152 }
153 if (onedev && mystat.st_dev != pdev) {
154 /* TODO should we be checking /proc/self/mountinfo for
155 * pathname and not doing this if found? */
156 if (btrfs_try_remove_subvol(pathname))
157 INFO("Removed btrfs subvolume at %s\n", pathname);
158 continue;
159 }
160 if (S_ISDIR(mystat.st_mode)) {
161 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
162 failed=1;
163 } else {
164 if (unlink(pathname) < 0) {
165 SYSERROR("Failed to delete %s", pathname);
166 failed=1;
167 }
168 }
169 }
170
171 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
172 ERROR("Failed to delete %s", dirname);
173 failed=1;
174 }
175
176 ret = closedir(dir);
177 if (ret) {
178 ERROR("Failed to close directory %s", dirname);
179 failed=1;
180 }
181
182 return failed ? -1 : 0;
183 }
184
185 /* We have two different magic values for overlayfs, yay. */
186 #ifndef OVERLAYFS_SUPER_MAGIC
187 #define OVERLAYFS_SUPER_MAGIC 0x794c764f
188 #endif
189
190 #ifndef OVERLAY_SUPER_MAGIC
191 #define OVERLAY_SUPER_MAGIC 0x794c7630
192 #endif
193
194 /* In overlayfs, st_dev is unreliable. So on overlayfs we don't do the
195 * lxc_rmdir_onedev()
196 */
197 static bool is_native_overlayfs(const char *path)
198 {
199 if (has_fs_type(path, OVERLAY_SUPER_MAGIC) ||
200 has_fs_type(path, OVERLAYFS_SUPER_MAGIC))
201 return true;
202
203 return false;
204 }
205
206 /* returns 0 on success, -1 if there were any failures */
207 extern int lxc_rmdir_onedev(char *path, const char *exclude)
208 {
209 struct stat mystat;
210 bool onedev = true;
211
212 if (is_native_overlayfs(path)) {
213 onedev = false;
214 }
215
216 if (lstat(path, &mystat) < 0) {
217 if (errno == ENOENT)
218 return 0;
219 ERROR("Failed to stat %s", path);
220 return -1;
221 }
222
223 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
224 }
225
226 /* borrowed from iproute2 */
227 extern int get_u16(unsigned short *val, const char *arg, int base)
228 {
229 unsigned long res;
230 char *ptr;
231
232 if (!arg || !*arg)
233 return -1;
234
235 errno = 0;
236 res = strtoul(arg, &ptr, base);
237 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
238 return -1;
239
240 *val = res;
241
242 return 0;
243 }
244
245 extern int mkdir_p(const char *dir, mode_t mode)
246 {
247 const char *tmp = dir;
248 const char *orig = dir;
249 char *makeme;
250
251 do {
252 dir = tmp + strspn(tmp, "/");
253 tmp = dir + strcspn(dir, "/");
254 makeme = strndup(orig, dir - orig);
255 if (*makeme) {
256 if (mkdir(makeme, mode) && errno != EEXIST) {
257 SYSERROR("failed to create directory '%s'", makeme);
258 free(makeme);
259 return -1;
260 }
261 }
262 free(makeme);
263 } while(tmp != dir);
264
265 return 0;
266 }
267
268 char *get_rundir()
269 {
270 char *rundir;
271 const char *homedir;
272
273 if (geteuid() == 0) {
274 rundir = strdup(RUNTIME_PATH);
275 return rundir;
276 }
277
278 rundir = getenv("XDG_RUNTIME_DIR");
279 if (rundir) {
280 rundir = strdup(rundir);
281 return rundir;
282 }
283
284 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
285 homedir = getenv("HOME");
286 if (!homedir) {
287 ERROR("HOME isn't set in the environment.");
288 return NULL;
289 }
290
291 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
292 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
293
294 return rundir;
295 }
296
297 int wait_for_pid(pid_t pid)
298 {
299 int status, ret;
300
301 again:
302 ret = waitpid(pid, &status, 0);
303 if (ret == -1) {
304 if (errno == EINTR)
305 goto again;
306 return -1;
307 }
308 if (ret != pid)
309 goto again;
310 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
311 return -1;
312 return 0;
313 }
314
315 int lxc_wait_for_pid_status(pid_t pid)
316 {
317 int status, ret;
318
319 again:
320 ret = waitpid(pid, &status, 0);
321 if (ret == -1) {
322 if (errno == EINTR)
323 goto again;
324 return -1;
325 }
326 if (ret != pid)
327 goto again;
328 return status;
329 }
330
331 ssize_t lxc_write_nointr(int fd, const void* buf, size_t count)
332 {
333 ssize_t ret;
334 again:
335 ret = write(fd, buf, count);
336 if (ret < 0 && errno == EINTR)
337 goto again;
338 return ret;
339 }
340
341 ssize_t lxc_read_nointr(int fd, void* buf, size_t count)
342 {
343 ssize_t ret;
344 again:
345 ret = read(fd, buf, count);
346 if (ret < 0 && errno == EINTR)
347 goto again;
348 return ret;
349 }
350
351 ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf)
352 {
353 ssize_t ret;
354 ret = lxc_read_nointr(fd, buf, count);
355 if (ret <= 0)
356 return ret;
357 if ((size_t)ret != count)
358 return -1;
359 if (expected_buf && memcmp(buf, expected_buf, count) != 0) {
360 errno = EINVAL;
361 return -1;
362 }
363 return ret;
364 }
365
366 #if HAVE_LIBGNUTLS
367 #include <gnutls/gnutls.h>
368 #include <gnutls/crypto.h>
369
370 __attribute__((constructor))
371 static void gnutls_lxc_init(void)
372 {
373 gnutls_global_init();
374 }
375
376 int sha1sum_file(char *fnam, unsigned char *digest)
377 {
378 char *buf;
379 int ret;
380 FILE *f;
381 long flen;
382
383 if (!fnam)
384 return -1;
385 f = fopen_cloexec(fnam, "r");
386 if (!f) {
387 SYSERROR("Error opening template");
388 return -1;
389 }
390 if (fseek(f, 0, SEEK_END) < 0) {
391 SYSERROR("Error seeking to end of template");
392 fclose(f);
393 return -1;
394 }
395 if ((flen = ftell(f)) < 0) {
396 SYSERROR("Error telling size of template");
397 fclose(f);
398 return -1;
399 }
400 if (fseek(f, 0, SEEK_SET) < 0) {
401 SYSERROR("Error seeking to start of template");
402 fclose(f);
403 return -1;
404 }
405 if ((buf = malloc(flen+1)) == NULL) {
406 SYSERROR("Out of memory");
407 fclose(f);
408 return -1;
409 }
410 if (fread(buf, 1, flen, f) != flen) {
411 SYSERROR("Failure reading template");
412 free(buf);
413 fclose(f);
414 return -1;
415 }
416 if (fclose(f) < 0) {
417 SYSERROR("Failre closing template");
418 free(buf);
419 return -1;
420 }
421 buf[flen] = '\0';
422 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
423 free(buf);
424 return ret;
425 }
426 #endif
427
428 char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup)
429 {
430 va_list ap2;
431 size_t count = 1 + skip;
432 char **result;
433
434 /* first determine size of argument list, we don't want to reallocate
435 * constantly...
436 */
437 va_copy(ap2, ap);
438 while (1) {
439 char* arg = va_arg(ap2, char*);
440 if (!arg)
441 break;
442 count++;
443 }
444 va_end(ap2);
445
446 result = calloc(count, sizeof(char*));
447 if (!result)
448 return NULL;
449 count = skip;
450 while (1) {
451 char* arg = va_arg(ap, char*);
452 if (!arg)
453 break;
454 arg = do_strdup ? strdup(arg) : arg;
455 if (!arg)
456 goto oom;
457 result[count++] = arg;
458 }
459
460 /* calloc has already set last element to NULL*/
461 return result;
462
463 oom:
464 free(result);
465 return NULL;
466 }
467
468 const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip)
469 {
470 return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0);
471 }
472
473 extern struct lxc_popen_FILE *lxc_popen(const char *command)
474 {
475 struct lxc_popen_FILE *fp = NULL;
476 int parent_end = -1, child_end = -1;
477 int pipe_fds[2];
478 pid_t child_pid;
479
480 int r = pipe2(pipe_fds, O_CLOEXEC);
481
482 if (r < 0) {
483 ERROR("pipe2 failure");
484 return NULL;
485 }
486
487 parent_end = pipe_fds[0];
488 child_end = pipe_fds[1];
489
490 child_pid = fork();
491
492 if (child_pid == 0) {
493 /* child */
494 int child_std_end = STDOUT_FILENO;
495
496 close(parent_end);
497
498 if (child_end != child_std_end) {
499 /* dup2() doesn't dup close-on-exec flag */
500 dup2(child_end, child_std_end);
501
502 /* it's safe not to close child_end here
503 * as it's marked close-on-exec anyway
504 */
505 } else {
506 /*
507 * The descriptor is already the one we will use.
508 * But it must not be marked close-on-exec.
509 * Undo the effects.
510 */
511 if (fcntl(child_end, F_SETFD, 0) != 0) {
512 SYSERROR("Failed to remove FD_CLOEXEC from fd.");
513 exit(127);
514 }
515 }
516
517 /*
518 * Unblock signals.
519 * This is the main/only reason
520 * why we do our lousy popen() emulation.
521 */
522 {
523 sigset_t mask;
524 sigfillset(&mask);
525 sigprocmask(SIG_UNBLOCK, &mask, NULL);
526 }
527
528 execl("/bin/sh", "sh", "-c", command, (char *) NULL);
529 exit(127);
530 }
531
532 /* parent */
533
534 close(child_end);
535
536 if (child_pid < 0) {
537 ERROR("fork failure");
538 goto error;
539 }
540
541 fp = calloc(1, sizeof(*fp));
542 if (!fp) {
543 ERROR("failed to allocate memory");
544 goto error;
545 }
546
547 fp->f = fdopen(parent_end, "r");
548 if (!fp->f) {
549 ERROR("fdopen failure");
550 goto error;
551 }
552
553 fp->child_pid = child_pid;
554
555 return fp;
556
557 error:
558
559 if (fp) {
560 if (fp->f) {
561 fclose(fp->f);
562 parent_end = -1; /* so we do not close it second time */
563 }
564
565 free(fp);
566 }
567
568 if (parent_end != -1)
569 close(parent_end);
570
571 return NULL;
572 }
573
574 extern int lxc_pclose(struct lxc_popen_FILE *fp)
575 {
576 FILE *f = NULL;
577 pid_t child_pid = 0;
578 int wstatus = 0;
579 pid_t wait_pid;
580
581 if (fp) {
582 f = fp->f;
583 child_pid = fp->child_pid;
584 /* free memory (we still need to close file stream) */
585 free(fp);
586 fp = NULL;
587 }
588
589 if (!f || fclose(f)) {
590 ERROR("fclose failure");
591 return -1;
592 }
593
594 do {
595 wait_pid = waitpid(child_pid, &wstatus, 0);
596 } while (wait_pid == -1 && errno == EINTR);
597
598 if (wait_pid == -1) {
599 ERROR("waitpid failure");
600 return -1;
601 }
602
603 return wstatus;
604 }
605
606 char *lxc_string_replace(const char *needle, const char *replacement, const char *haystack)
607 {
608 ssize_t len = -1, saved_len = -1;
609 char *result = NULL;
610 size_t replacement_len = strlen(replacement);
611 size_t needle_len = strlen(needle);
612
613 /* should be executed exactly twice */
614 while (len == -1 || result == NULL) {
615 char *p;
616 char *last_p;
617 ssize_t part_len;
618
619 if (len != -1) {
620 result = calloc(1, len + 1);
621 if (!result)
622 return NULL;
623 saved_len = len;
624 }
625
626 len = 0;
627
628 for (last_p = (char *)haystack, p = strstr(last_p, needle); p; last_p = p, p = strstr(last_p, needle)) {
629 part_len = (ssize_t)(p - last_p);
630 if (result && part_len > 0)
631 memcpy(&result[len], last_p, part_len);
632 len += part_len;
633 if (result && replacement_len > 0)
634 memcpy(&result[len], replacement, replacement_len);
635 len += replacement_len;
636 p += needle_len;
637 }
638 part_len = strlen(last_p);
639 if (result && part_len > 0)
640 memcpy(&result[len], last_p, part_len);
641 len += part_len;
642 }
643
644 /* make sure we did the same thing twice,
645 * once for calculating length, the other
646 * time for copying data */
647 if (saved_len != len) {
648 free(result);
649 return NULL;
650 }
651 /* make sure we didn't overwrite any buffer,
652 * due to calloc the string should be 0-terminated */
653 if (result[len] != '\0') {
654 free(result);
655 return NULL;
656 }
657
658 return result;
659 }
660
661 bool lxc_string_in_array(const char *needle, const char **haystack)
662 {
663 for (; haystack && *haystack; haystack++)
664 if (!strcmp(needle, *haystack))
665 return true;
666 return false;
667 }
668
669 char *lxc_string_join(const char *sep, const char **parts, bool use_as_prefix)
670 {
671 char *result;
672 char **p;
673 size_t sep_len = strlen(sep);
674 size_t result_len = use_as_prefix * sep_len;
675
676 /* calculate new string length */
677 for (p = (char **)parts; *p; p++)
678 result_len += (p > (char **)parts) * sep_len + strlen(*p);
679
680 result = calloc(result_len + 1, 1);
681 if (!result)
682 return NULL;
683
684 if (use_as_prefix)
685 strcpy(result, sep);
686 for (p = (char **)parts; *p; p++) {
687 if (p > (char **)parts)
688 strcat(result, sep);
689 strcat(result, *p);
690 }
691
692 return result;
693 }
694
695 char **lxc_normalize_path(const char *path)
696 {
697 char **components;
698 char **p;
699 size_t components_len = 0;
700 size_t pos = 0;
701
702 components = lxc_string_split(path, '/');
703 if (!components)
704 return NULL;
705 for (p = components; *p; p++)
706 components_len++;
707
708 /* resolve '.' and '..' */
709 for (pos = 0; pos < components_len; ) {
710 if (!strcmp(components[pos], ".") || (!strcmp(components[pos], "..") && pos == 0)) {
711 /* eat this element */
712 free(components[pos]);
713 memmove(&components[pos], &components[pos+1], sizeof(char *) * (components_len - pos));
714 components_len--;
715 } else if (!strcmp(components[pos], "..")) {
716 /* eat this and the previous element */
717 free(components[pos - 1]);
718 free(components[pos]);
719 memmove(&components[pos-1], &components[pos+1], sizeof(char *) * (components_len - pos));
720 components_len -= 2;
721 pos--;
722 } else {
723 pos++;
724 }
725 }
726
727 return components;
728 }
729
730 char *lxc_deslashify(const char *path)
731 {
732 char *dup, *p;
733 char **parts = NULL;
734 size_t n, len;
735
736 dup = strdup(path);
737 if (!dup)
738 return NULL;
739
740 parts = lxc_normalize_path(dup);
741 if (!parts) {
742 free(dup);
743 return NULL;
744 }
745
746 /* We'll end up here if path == "///" or path == "". */
747 if (!*parts) {
748 len = strlen(dup);
749 if (!len) {
750 lxc_free_array((void **)parts, free);
751 return dup;
752 }
753 n = strcspn(dup, "/");
754 if (n == len) {
755 free(dup);
756 lxc_free_array((void **)parts, free);
757
758 p = strdup("/");
759 if (!p)
760 return NULL;
761
762 return p;
763 }
764 }
765
766 p = lxc_string_join("/", (const char **)parts, *dup == '/');
767 free(dup);
768 lxc_free_array((void **)parts, free);
769 return p;
770 }
771
772 char *lxc_append_paths(const char *first, const char *second)
773 {
774 size_t len = strlen(first) + strlen(second) + 1;
775 const char *pattern = "%s%s";
776 char *result = NULL;
777
778 if (second[0] != '/') {
779 len += 1;
780 pattern = "%s/%s";
781 }
782
783 result = calloc(1, len);
784 if (!result)
785 return NULL;
786
787 snprintf(result, len, pattern, first, second);
788 return result;
789 }
790
791 bool lxc_string_in_list(const char *needle, const char *haystack, char _sep)
792 {
793 char *token, *str, *saveptr = NULL;
794 char sep[2] = { _sep, '\0' };
795
796 if (!haystack || !needle)
797 return 0;
798
799 str = alloca(strlen(haystack)+1);
800 strcpy(str, haystack);
801 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
802 if (strcmp(needle, token) == 0)
803 return 1;
804 }
805
806 return 0;
807 }
808
809 char **lxc_string_split(const char *string, char _sep)
810 {
811 char *token, *str, *saveptr = NULL;
812 char sep[2] = {_sep, '\0'};
813 char **tmp = NULL, **result = NULL;
814 size_t result_capacity = 0;
815 size_t result_count = 0;
816 int r, saved_errno;
817
818 if (!string)
819 return calloc(1, sizeof(char *));
820
821 str = alloca(strlen(string) + 1);
822 strcpy(str, string);
823 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
824 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
825 if (r < 0)
826 goto error_out;
827 result[result_count] = strdup(token);
828 if (!result[result_count])
829 goto error_out;
830 result_count++;
831 }
832
833 /* if we allocated too much, reduce it */
834 tmp = realloc(result, (result_count + 1) * sizeof(char *));
835 if (!tmp)
836 goto error_out;
837 result = tmp;
838 /* Make sure we don't return uninitialized memory. */
839 if (result_count == 0)
840 *result = NULL;
841 return result;
842 error_out:
843 saved_errno = errno;
844 lxc_free_array((void **)result, free);
845 errno = saved_errno;
846 return NULL;
847 }
848
849 char **lxc_string_split_and_trim(const char *string, char _sep)
850 {
851 char *token, *str, *saveptr = NULL;
852 char sep[2] = { _sep, '\0' };
853 char **result = NULL;
854 size_t result_capacity = 0;
855 size_t result_count = 0;
856 int r, saved_errno;
857 size_t i = 0;
858
859 if (!string)
860 return calloc(1, sizeof(char *));
861
862 str = alloca(strlen(string)+1);
863 strcpy(str, string);
864 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
865 while (token[0] == ' ' || token[0] == '\t')
866 token++;
867 i = strlen(token);
868 while (i > 0 && (token[i - 1] == ' ' || token[i - 1] == '\t')) {
869 token[i - 1] = '\0';
870 i--;
871 }
872 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
873 if (r < 0)
874 goto error_out;
875 result[result_count] = strdup(token);
876 if (!result[result_count])
877 goto error_out;
878 result_count++;
879 }
880
881 /* if we allocated too much, reduce it */
882 return realloc(result, (result_count + 1) * sizeof(char *));
883 error_out:
884 saved_errno = errno;
885 lxc_free_array((void **)result, free);
886 errno = saved_errno;
887 return NULL;
888 }
889
890 void lxc_free_array(void **array, lxc_free_fn element_free_fn)
891 {
892 void **p;
893 for (p = array; p && *p; p++)
894 element_free_fn(*p);
895 free((void*)array);
896 }
897
898 int lxc_grow_array(void ***array, size_t* capacity, size_t new_size, size_t capacity_increment)
899 {
900 size_t new_capacity;
901 void **new_array;
902
903 /* first time around, catch some trivial mistakes of the user
904 * only initializing one of these */
905 if (!*array || !*capacity) {
906 *array = NULL;
907 *capacity = 0;
908 }
909
910 new_capacity = *capacity;
911 while (new_size + 1 > new_capacity)
912 new_capacity += capacity_increment;
913 if (new_capacity != *capacity) {
914 /* we have to reallocate */
915 new_array = realloc(*array, new_capacity * sizeof(void *));
916 if (!new_array)
917 return -1;
918 memset(&new_array[*capacity], 0, (new_capacity - (*capacity)) * sizeof(void *));
919 *array = new_array;
920 *capacity = new_capacity;
921 }
922
923 /* array has sufficient elements */
924 return 0;
925 }
926
927 size_t lxc_array_len(void **array)
928 {
929 void **p;
930 size_t result = 0;
931
932 for (p = array; p && *p; p++)
933 result++;
934
935 return result;
936 }
937
938 int lxc_write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
939 {
940 int fd, saved_errno;
941 ssize_t ret;
942
943 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
944 if (fd < 0)
945 return -1;
946 ret = lxc_write_nointr(fd, buf, count);
947 if (ret < 0)
948 goto out_error;
949 if ((size_t)ret != count)
950 goto out_error;
951 if (add_newline) {
952 ret = lxc_write_nointr(fd, "\n", 1);
953 if (ret != 1)
954 goto out_error;
955 }
956 close(fd);
957 return 0;
958
959 out_error:
960 saved_errno = errno;
961 close(fd);
962 errno = saved_errno;
963 return -1;
964 }
965
966 int lxc_read_from_file(const char *filename, void* buf, size_t count)
967 {
968 int fd = -1, saved_errno;
969 ssize_t ret;
970
971 fd = open(filename, O_RDONLY | O_CLOEXEC);
972 if (fd < 0)
973 return -1;
974
975 if (!buf || !count) {
976 char buf2[100];
977 size_t count2 = 0;
978 while ((ret = read(fd, buf2, 100)) > 0)
979 count2 += ret;
980 if (ret >= 0)
981 ret = count2;
982 } else {
983 memset(buf, 0, count);
984 ret = read(fd, buf, count);
985 }
986
987 if (ret < 0)
988 ERROR("read %s: %s", filename, strerror(errno));
989
990 saved_errno = errno;
991 close(fd);
992 errno = saved_errno;
993 return ret;
994 }
995
996 void **lxc_append_null_to_array(void **array, size_t count)
997 {
998 void **temp;
999
1000 /* Append NULL to the array */
1001 if (count) {
1002 temp = realloc(array, (count + 1) * sizeof(*array));
1003 if (!temp) {
1004 size_t i;
1005 for (i = 0; i < count; i++)
1006 free(array[i]);
1007 free(array);
1008 return NULL;
1009 }
1010 array = temp;
1011 array[count] = NULL;
1012 }
1013 return array;
1014 }
1015
1016 int randseed(bool srand_it)
1017 {
1018 /*
1019 srand pre-seed function based on /dev/urandom
1020 */
1021 unsigned int seed = time(NULL) + getpid();
1022
1023 FILE *f;
1024 f = fopen("/dev/urandom", "r");
1025 if (f) {
1026 int ret = fread(&seed, sizeof(seed), 1, f);
1027 if (ret != 1)
1028 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno));
1029 fclose(f);
1030 }
1031
1032 if (srand_it)
1033 srand(seed);
1034
1035 return seed;
1036 }
1037
1038 uid_t get_ns_uid(uid_t orig)
1039 {
1040 char *line = NULL;
1041 size_t sz = 0;
1042 uid_t nsid, hostid, range;
1043 FILE *f = fopen("/proc/self/uid_map", "r");
1044 if (!f)
1045 return 0;
1046
1047 while (getline(&line, &sz, f) != -1) {
1048 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
1049 continue;
1050 if (hostid <= orig && hostid + range > orig) {
1051 nsid += orig - hostid;
1052 goto found;
1053 }
1054 }
1055
1056 nsid = 0;
1057 found:
1058 fclose(f);
1059 free(line);
1060 return nsid;
1061 }
1062
1063 bool dir_exists(const char *path)
1064 {
1065 struct stat sb;
1066 int ret;
1067
1068 ret = stat(path, &sb);
1069 if (ret < 0)
1070 /* Could be something other than eexist, just say "no". */
1071 return false;
1072 return S_ISDIR(sb.st_mode);
1073 }
1074
1075 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1076 * FNV has good anti collision properties and we're not worried
1077 * about pre-image resistance or one-way-ness, we're just trying to make
1078 * the name unique in the 108 bytes of space we have.
1079 */
1080 uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1081 {
1082 unsigned char *bp;
1083
1084 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1085 {
1086 /* xor the bottom with the current octet */
1087 hval ^= (uint64_t)*bp;
1088
1089 /* gcc optimised:
1090 * multiply by the 64 bit FNV magic prime mod 2^64
1091 */
1092 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1093 (hval << 7) + (hval << 8) + (hval << 40);
1094 }
1095
1096 return hval;
1097 }
1098
1099 /*
1100 * Detect whether / is mounted MS_SHARED. The only way I know of to
1101 * check that is through /proc/self/mountinfo.
1102 * I'm only checking for /. If the container rootfs or mount location
1103 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1104 * out would be too much work to be worth it.
1105 */
1106 int detect_shared_rootfs(void)
1107 {
1108 char buf[LXC_LINELEN], *p;
1109 FILE *f;
1110 int i;
1111 char *p2;
1112
1113 f = fopen("/proc/self/mountinfo", "r");
1114 if (!f)
1115 return 0;
1116 while (fgets(buf, LXC_LINELEN, f)) {
1117 for (p = buf, i = 0; p && i < 4; i++)
1118 p = strchr(p + 1, ' ');
1119 if (!p)
1120 continue;
1121 p2 = strchr(p + 1, ' ');
1122 if (!p2)
1123 continue;
1124 *p2 = '\0';
1125 if (strcmp(p + 1, "/") == 0) {
1126 /* This is '/'. Is it shared? */
1127 p = strchr(p2 + 1, ' ');
1128 if (p && strstr(p, "shared:")) {
1129 fclose(f);
1130 return 1;
1131 }
1132 }
1133 }
1134 fclose(f);
1135 return 0;
1136 }
1137
1138 bool switch_to_ns(pid_t pid, const char *ns) {
1139 int fd, ret;
1140 char nspath[MAXPATHLEN];
1141
1142 /* Switch to new ns */
1143 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
1144 if (ret < 0 || ret >= MAXPATHLEN)
1145 return false;
1146
1147 fd = open(nspath, O_RDONLY);
1148 if (fd < 0) {
1149 SYSERROR("failed to open %s", nspath);
1150 return false;
1151 }
1152
1153 ret = setns(fd, 0);
1154 if (ret) {
1155 SYSERROR("failed to set process %d to %s of %d.", pid, ns, fd);
1156 close(fd);
1157 return false;
1158 }
1159 close(fd);
1160 return true;
1161 }
1162
1163 /*
1164 * looking at fs/proc_namespace.c, it appears we can
1165 * actually expect the rootfs entry to very specifically contain
1166 * " - rootfs rootfs "
1167 * IIUC, so long as we've chrooted so that rootfs is not our root,
1168 * the rootfs entry should always be skipped in mountinfo contents.
1169 */
1170 bool detect_ramfs_rootfs(void)
1171 {
1172 FILE *f;
1173 char *p, *p2;
1174 char *line = NULL;
1175 size_t len = 0;
1176 int i;
1177
1178 f = fopen("/proc/self/mountinfo", "r");
1179 if (!f)
1180 return false;
1181
1182 while (getline(&line, &len, f) != -1) {
1183 for (p = line, i = 0; p && i < 4; i++)
1184 p = strchr(p + 1, ' ');
1185 if (!p)
1186 continue;
1187 p2 = strchr(p + 1, ' ');
1188 if (!p2)
1189 continue;
1190 *p2 = '\0';
1191 if (strcmp(p + 1, "/") == 0) {
1192 /* This is '/'. Is it the ramfs? */
1193 p = strchr(p2 + 1, '-');
1194 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
1195 free(line);
1196 fclose(f);
1197 return true;
1198 }
1199 }
1200 }
1201 free(line);
1202 fclose(f);
1203 return false;
1204 }
1205
1206 char *on_path(const char *cmd, const char *rootfs) {
1207 char *path = NULL;
1208 char *entry = NULL;
1209 char *saveptr = NULL;
1210 char cmdpath[MAXPATHLEN];
1211 int ret;
1212
1213 path = getenv("PATH");
1214 if (!path)
1215 return NULL;
1216
1217 path = strdup(path);
1218 if (!path)
1219 return NULL;
1220
1221 entry = strtok_r(path, ":", &saveptr);
1222 while (entry) {
1223 if (rootfs)
1224 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs, entry, cmd);
1225 else
1226 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
1227
1228 if (ret < 0 || ret >= MAXPATHLEN)
1229 goto next_loop;
1230
1231 if (access(cmdpath, X_OK) == 0) {
1232 free(path);
1233 return strdup(cmdpath);
1234 }
1235
1236 next_loop:
1237 entry = strtok_r(NULL, ":", &saveptr);
1238 }
1239
1240 free(path);
1241 return NULL;
1242 }
1243
1244 bool file_exists(const char *f)
1245 {
1246 struct stat statbuf;
1247
1248 return stat(f, &statbuf) == 0;
1249 }
1250
1251 bool cgns_supported(void)
1252 {
1253 return file_exists("/proc/self/ns/cgroup");
1254 }
1255
1256 /* historically lxc-init has been under /usr/lib/lxc and under
1257 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1258 */
1259 char *choose_init(const char *rootfs)
1260 {
1261 char *retv = NULL;
1262 const char *empty = "",
1263 *tmp;
1264 int ret, env_set = 0;
1265
1266 if (!getenv("PATH")) {
1267 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1268 SYSERROR("Failed to setenv");
1269 env_set = 1;
1270 }
1271
1272 retv = on_path("init.lxc", rootfs);
1273
1274 if (env_set) {
1275 if (unsetenv("PATH"))
1276 SYSERROR("Failed to unsetenv");
1277 }
1278
1279 if (retv)
1280 return retv;
1281
1282 retv = malloc(PATH_MAX);
1283 if (!retv)
1284 return NULL;
1285
1286 if (rootfs)
1287 tmp = rootfs;
1288 else
1289 tmp = empty;
1290
1291 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
1292 if (ret < 0 || ret >= PATH_MAX) {
1293 ERROR("pathname too long");
1294 goto out1;
1295 }
1296 if (access(retv, X_OK) == 0)
1297 return retv;
1298
1299 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
1300 if (ret < 0 || ret >= PATH_MAX) {
1301 ERROR("pathname too long");
1302 goto out1;
1303 }
1304 if (access(retv, X_OK) == 0)
1305 return retv;
1306
1307 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
1308 if (ret < 0 || ret >= PATH_MAX) {
1309 ERROR("pathname too long");
1310 goto out1;
1311 }
1312 if (access(retv, X_OK) == 0)
1313 return retv;
1314
1315 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
1316 if (ret < 0 || ret >= PATH_MAX) {
1317 ERROR("pathname too long");
1318 goto out1;
1319 }
1320 if (access(retv, X_OK) == 0)
1321 return retv;
1322
1323 /*
1324 * Last resort, look for the statically compiled init.lxc which we
1325 * hopefully bind-mounted in.
1326 * If we are called during container setup, and we get to this point,
1327 * then the init.lxc.static from the host will need to be bind-mounted
1328 * in. So we return NULL here to indicate that.
1329 */
1330 if (rootfs)
1331 goto out1;
1332
1333 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
1334 if (ret < 0 || ret >= PATH_MAX) {
1335 WARN("Nonsense - name /lxc.init.static too long");
1336 goto out1;
1337 }
1338 if (access(retv, X_OK) == 0)
1339 return retv;
1340
1341 out1:
1342 free(retv);
1343 return NULL;
1344 }
1345
1346 int print_to_file(const char *file, const char *content)
1347 {
1348 FILE *f;
1349 int ret = 0;
1350
1351 f = fopen(file, "w");
1352 if (!f)
1353 return -1;
1354 if (fprintf(f, "%s", content) != strlen(content))
1355 ret = -1;
1356 fclose(f);
1357 return ret;
1358 }
1359
1360 int is_dir(const char *path)
1361 {
1362 struct stat statbuf;
1363 int ret = stat(path, &statbuf);
1364 if (ret == 0 && S_ISDIR(statbuf.st_mode))
1365 return 1;
1366 return 0;
1367 }
1368
1369 /*
1370 * Given the '-t' template option to lxc-create, figure out what to
1371 * do. If the template is a full executable path, use that. If it
1372 * is something like 'sshd', then return $templatepath/lxc-sshd.
1373 * On success return the template, on error return NULL.
1374 */
1375 char *get_template_path(const char *t)
1376 {
1377 int ret, len;
1378 char *tpath;
1379
1380 if (t[0] == '/' && access(t, X_OK) == 0) {
1381 tpath = strdup(t);
1382 return tpath;
1383 }
1384
1385 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
1386 tpath = malloc(len);
1387 if (!tpath)
1388 return NULL;
1389 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
1390 if (ret < 0 || ret >= len) {
1391 free(tpath);
1392 return NULL;
1393 }
1394 if (access(tpath, X_OK) < 0) {
1395 SYSERROR("bad template: %s", t);
1396 free(tpath);
1397 return NULL;
1398 }
1399
1400 return tpath;
1401 }
1402
1403 /*
1404 * Sets the process title to the specified title. Note that this may fail if
1405 * the kernel doesn't support PR_SET_MM_MAP (kernels <3.18).
1406 */
1407 int setproctitle(char *title)
1408 {
1409 static char *proctitle = NULL;
1410 char buf[2048], *tmp;
1411 FILE *f;
1412 int i, len, ret = 0;
1413
1414 /* We don't really need to know all of this stuff, but unfortunately
1415 * PR_SET_MM_MAP requires us to set it all at once, so we have to
1416 * figure it out anyway.
1417 */
1418 unsigned long start_data, end_data, start_brk, start_code, end_code,
1419 start_stack, arg_start, arg_end, env_start, env_end,
1420 brk_val;
1421 struct prctl_mm_map prctl_map;
1422
1423 f = fopen_cloexec("/proc/self/stat", "r");
1424 if (!f) {
1425 return -1;
1426 }
1427
1428 tmp = fgets(buf, sizeof(buf), f);
1429 fclose(f);
1430 if (!tmp) {
1431 return -1;
1432 }
1433
1434 /* Skip the first 25 fields, column 26-28 are start_code, end_code,
1435 * and start_stack */
1436 tmp = strchr(buf, ' ');
1437 for (i = 0; i < 24; i++) {
1438 if (!tmp)
1439 return -1;
1440 tmp = strchr(tmp+1, ' ');
1441 }
1442 if (!tmp)
1443 return -1;
1444
1445 i = sscanf(tmp, "%lu %lu %lu", &start_code, &end_code, &start_stack);
1446 if (i != 3)
1447 return -1;
1448
1449 /* Skip the next 19 fields, column 45-51 are start_data to arg_end */
1450 for (i = 0; i < 19; i++) {
1451 if (!tmp)
1452 return -1;
1453 tmp = strchr(tmp+1, ' ');
1454 }
1455
1456 if (!tmp)
1457 return -1;
1458
1459 i = sscanf(tmp, "%lu %lu %lu %*u %*u %lu %lu",
1460 &start_data,
1461 &end_data,
1462 &start_brk,
1463 &env_start,
1464 &env_end);
1465 if (i != 5)
1466 return -1;
1467
1468 /* Include the null byte here, because in the calculations below we
1469 * want to have room for it. */
1470 len = strlen(title) + 1;
1471
1472 proctitle = realloc(proctitle, len);
1473 if (!proctitle)
1474 return -1;
1475
1476 arg_start = (unsigned long) proctitle;
1477 arg_end = arg_start + len;
1478
1479 brk_val = syscall(__NR_brk, 0);
1480
1481 prctl_map = (struct prctl_mm_map) {
1482 .start_code = start_code,
1483 .end_code = end_code,
1484 .start_stack = start_stack,
1485 .start_data = start_data,
1486 .end_data = end_data,
1487 .start_brk = start_brk,
1488 .brk = brk_val,
1489 .arg_start = arg_start,
1490 .arg_end = arg_end,
1491 .env_start = env_start,
1492 .env_end = env_end,
1493 .auxv = NULL,
1494 .auxv_size = 0,
1495 .exe_fd = -1,
1496 };
1497
1498 ret = prctl(PR_SET_MM, PR_SET_MM_MAP, (long) &prctl_map, sizeof(prctl_map), 0);
1499 if (ret == 0)
1500 strcpy((char*)arg_start, title);
1501 else
1502 INFO("setting cmdline failed - %s", strerror(errno));
1503
1504 return ret;
1505 }
1506
1507 /*
1508 * @path: a pathname where / replaced with '\0'.
1509 * @offsetp: pointer to int showing which path segment was last seen.
1510 * Updated on return to reflect the next segment.
1511 * @fulllen: full original path length.
1512 * Returns a pointer to the next path segment, or NULL if done.
1513 */
1514 static char *get_nextpath(char *path, int *offsetp, int fulllen)
1515 {
1516 int offset = *offsetp;
1517
1518 if (offset >= fulllen)
1519 return NULL;
1520
1521 while (path[offset] != '\0' && offset < fulllen)
1522 offset++;
1523 while (path[offset] == '\0' && offset < fulllen)
1524 offset++;
1525
1526 *offsetp = offset;
1527 return (offset < fulllen) ? &path[offset] : NULL;
1528 }
1529
1530 /*
1531 * Check that @subdir is a subdir of @dir. @len is the length of
1532 * @dir (to avoid having to recalculate it).
1533 */
1534 static bool is_subdir(const char *subdir, const char *dir, size_t len)
1535 {
1536 size_t subdirlen = strlen(subdir);
1537
1538 if (subdirlen < len)
1539 return false;
1540 if (strncmp(subdir, dir, len) != 0)
1541 return false;
1542 if (dir[len-1] == '/')
1543 return true;
1544 if (subdir[len] == '/' || subdirlen == len)
1545 return true;
1546 return false;
1547 }
1548
1549 /*
1550 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
1551 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
1552 */
1553 static int check_symlink(int fd)
1554 {
1555 struct stat sb;
1556 int ret = fstat(fd, &sb);
1557 if (ret < 0)
1558 return -ENOENT;
1559 if (S_ISLNK(sb.st_mode))
1560 return -ELOOP;
1561 return 0;
1562 }
1563
1564 /*
1565 * Open a file or directory, provided that it contains no symlinks.
1566 *
1567 * CAVEAT: This function must not be used for other purposes than container
1568 * setup before executing the container's init
1569 */
1570 static int open_if_safe(int dirfd, const char *nextpath)
1571 {
1572 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1573 if (newfd >= 0) /* Was not a symlink, all good. */
1574 return newfd;
1575
1576 if (errno == ELOOP)
1577 return newfd;
1578
1579 if (errno == EPERM || errno == EACCES) {
1580 /* We're not root (cause we got EPERM) so try opening with
1581 * O_PATH.
1582 */
1583 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
1584 if (newfd >= 0) {
1585 /* O_PATH will return an fd for symlinks. We know
1586 * nextpath wasn't a symlink at last openat, so if fd is
1587 * now a link, then something * fishy is going on.
1588 */
1589 int ret = check_symlink(newfd);
1590 if (ret < 0) {
1591 close(newfd);
1592 newfd = ret;
1593 }
1594 }
1595 }
1596
1597 return newfd;
1598 }
1599
1600 /*
1601 * Open a path intending for mounting, ensuring that the final path
1602 * is inside the container's rootfs.
1603 *
1604 * CAVEAT: This function must not be used for other purposes than container
1605 * setup before executing the container's init
1606 *
1607 * @target: path to be opened
1608 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1609 * would be the container's rootfs.
1610 *
1611 * Return an open fd for the path, or <0 on error.
1612 */
1613 static int open_without_symlink(const char *target, const char *prefix_skip)
1614 {
1615 int curlen = 0, dirfd, fulllen, i;
1616 char *dup = NULL;
1617
1618 fulllen = strlen(target);
1619
1620 /* make sure prefix-skip makes sense */
1621 if (prefix_skip && strlen(prefix_skip) > 0) {
1622 curlen = strlen(prefix_skip);
1623 if (!is_subdir(target, prefix_skip, curlen)) {
1624 ERROR("WHOA there - target '%s' didn't start with prefix '%s'",
1625 target, prefix_skip);
1626 return -EINVAL;
1627 }
1628 /*
1629 * get_nextpath() expects the curlen argument to be
1630 * on a (turned into \0) / or before it, so decrement
1631 * curlen to make sure that happens
1632 */
1633 if (curlen)
1634 curlen--;
1635 } else {
1636 prefix_skip = "/";
1637 curlen = 0;
1638 }
1639
1640 /* Make a copy of target which we can hack up, and tokenize it */
1641 if ((dup = strdup(target)) == NULL) {
1642 SYSERROR("Out of memory checking for symbolic link");
1643 return -ENOMEM;
1644 }
1645 for (i = 0; i < fulllen; i++) {
1646 if (dup[i] == '/')
1647 dup[i] = '\0';
1648 }
1649
1650 dirfd = open(prefix_skip, O_RDONLY);
1651 if (dirfd < 0)
1652 goto out;
1653 while (1) {
1654 int newfd, saved_errno;
1655 char *nextpath;
1656
1657 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1658 goto out;
1659 newfd = open_if_safe(dirfd, nextpath);
1660 saved_errno = errno;
1661 close(dirfd);
1662 dirfd = newfd;
1663 if (newfd < 0) {
1664 errno = saved_errno;
1665 if (errno == ELOOP)
1666 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
1667 goto out;
1668 }
1669 }
1670
1671 out:
1672 free(dup);
1673 return dirfd;
1674 }
1675
1676 /*
1677 * Safely mount a path into a container, ensuring that the mount target
1678 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1679 * uses the host's /)
1680 *
1681 * CAVEAT: This function must not be used for other purposes than container
1682 * setup before executing the container's init
1683 */
1684 int safe_mount(const char *src, const char *dest, const char *fstype,
1685 unsigned long flags, const void *data, const char *rootfs)
1686 {
1687 int destfd, ret, saved_errno;
1688 /* Only needs enough for /proc/self/fd/<fd>. */
1689 char srcbuf[50], destbuf[50];
1690 int srcfd = -1;
1691 const char *mntsrc = src;
1692
1693 if (!rootfs)
1694 rootfs = "";
1695
1696 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1697 if (flags & MS_BIND && src && src[0] != '/') {
1698 INFO("this is a relative bind mount");
1699 srcfd = open_without_symlink(src, NULL);
1700 if (srcfd < 0)
1701 return srcfd;
1702 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1703 if (ret < 0 || ret > 50) {
1704 close(srcfd);
1705 ERROR("Out of memory");
1706 return -EINVAL;
1707 }
1708 mntsrc = srcbuf;
1709 }
1710
1711 destfd = open_without_symlink(dest, rootfs);
1712 if (destfd < 0) {
1713 if (srcfd != -1) {
1714 saved_errno = errno;
1715 close(srcfd);
1716 errno = saved_errno;
1717 }
1718 return destfd;
1719 }
1720
1721 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1722 if (ret < 0 || ret > 50) {
1723 if (srcfd != -1)
1724 close(srcfd);
1725 close(destfd);
1726 ERROR("Out of memory");
1727 return -EINVAL;
1728 }
1729
1730 ret = mount(mntsrc, destbuf, fstype, flags, data);
1731 saved_errno = errno;
1732 if (srcfd != -1)
1733 close(srcfd);
1734 close(destfd);
1735 if (ret < 0) {
1736 errno = saved_errno;
1737 SYSERROR("Failed to mount %s onto %s", src, dest);
1738 return ret;
1739 }
1740
1741 return 0;
1742 }
1743
1744 /*
1745 * Mount a proc under @rootfs if proc self points to a pid other than
1746 * my own. This is needed to have a known-good proc mount for setting
1747 * up LSMs both at container startup and attach.
1748 *
1749 * @rootfs : the rootfs where proc should be mounted
1750 *
1751 * Returns < 0 on failure, 0 if the correct proc was already mounted
1752 * and 1 if a new proc was mounted.
1753 *
1754 * NOTE: not to be called from inside the container namespace!
1755 */
1756 int lxc_mount_proc_if_needed(const char *rootfs)
1757 {
1758 char path[MAXPATHLEN];
1759 int link_to_pid, linklen, mypid, ret;
1760 char link[LXC_NUMSTRLEN64] = {0};
1761
1762 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1763 if (ret < 0 || ret >= MAXPATHLEN) {
1764 SYSERROR("proc path name too long");
1765 return -1;
1766 }
1767
1768 linklen = readlink(path, link, LXC_NUMSTRLEN64);
1769
1770 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
1771 if (ret < 0 || ret >= MAXPATHLEN) {
1772 SYSERROR("proc path name too long");
1773 return -1;
1774 }
1775
1776 /* /proc not mounted */
1777 if (linklen < 0) {
1778 if (mkdir(path, 0755) && errno != EEXIST)
1779 return -1;
1780 goto domount;
1781 } else if (linklen >= LXC_NUMSTRLEN64) {
1782 link[linklen - 1] = '\0';
1783 ERROR("readlink returned truncated content: \"%s\"", link);
1784 return -1;
1785 }
1786
1787 mypid = getpid();
1788 INFO("I am %d, /proc/self points to \"%s\"", mypid, link);
1789
1790 if (lxc_safe_int(link, &link_to_pid) < 0)
1791 return -1;
1792
1793 /* correct procfs is already mounted */
1794 if (link_to_pid == mypid)
1795 return 0;
1796
1797 ret = umount2(path, MNT_DETACH);
1798 if (ret < 0)
1799 WARN("failed to umount \"%s\" with MNT_DETACH", path);
1800
1801 domount:
1802 /* rootfs is NULL */
1803 if (!strcmp(rootfs, ""))
1804 ret = mount("proc", path, "proc", 0, NULL);
1805 else
1806 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
1807 if (ret < 0)
1808 return -1;
1809
1810 INFO("mounted /proc in container for security transition");
1811 return 1;
1812 }
1813
1814 int open_devnull(void)
1815 {
1816 int fd = open("/dev/null", O_RDWR);
1817
1818 if (fd < 0)
1819 SYSERROR("Can't open /dev/null");
1820
1821 return fd;
1822 }
1823
1824 int set_stdfds(int fd)
1825 {
1826 int ret;
1827
1828 if (fd < 0)
1829 return -1;
1830
1831 ret = dup2(fd, STDIN_FILENO);
1832 if (ret < 0)
1833 return -1;
1834
1835 ret = dup2(fd, STDOUT_FILENO);
1836 if (ret < 0)
1837 return -1;
1838
1839 ret = dup2(fd, STDERR_FILENO);
1840 if (ret < 0)
1841 return -1;
1842
1843 return 0;
1844 }
1845
1846 int null_stdfds(void)
1847 {
1848 int ret = -1;
1849 int fd = open_devnull();
1850
1851 if (fd >= 0) {
1852 ret = set_stdfds(fd);
1853 close(fd);
1854 }
1855
1856 return ret;
1857 }
1858
1859 /*
1860 * Return the number of lines in file @fn, or -1 on error
1861 */
1862 int lxc_count_file_lines(const char *fn)
1863 {
1864 FILE *f;
1865 char *line = NULL;
1866 size_t sz = 0;
1867 int n = 0;
1868
1869 f = fopen_cloexec(fn, "r");
1870 if (!f)
1871 return -1;
1872
1873 while (getline(&line, &sz, f) != -1) {
1874 n++;
1875 }
1876 free(line);
1877 fclose(f);
1878 return n;
1879 }
1880
1881 void *lxc_strmmap(void *addr, size_t length, int prot, int flags, int fd,
1882 off_t offset)
1883 {
1884 void *tmp = NULL, *overlap = NULL;
1885
1886 /* We establish an anonymous mapping that is one byte larger than the
1887 * underlying file. The pages handed to us are zero filled. */
1888 tmp = mmap(addr, length + 1, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1889 if (tmp == MAP_FAILED)
1890 return tmp;
1891
1892 /* Now we establish a fixed-address mapping starting at the address we
1893 * received from our anonymous mapping and replace all bytes excluding
1894 * the additional \0-byte with the file. This allows us to use normal
1895 * string-handling functions. */
1896 overlap = mmap(tmp, length, prot, MAP_FIXED | flags, fd, offset);
1897 if (overlap == MAP_FAILED)
1898 munmap(tmp, length + 1);
1899
1900 return overlap;
1901 }
1902
1903 int lxc_strmunmap(void *addr, size_t length)
1904 {
1905 return munmap(addr, length + 1);
1906 }
1907
1908 /* Check whether a signal is blocked by a process. */
1909 /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
1910 #define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
1911 bool task_blocking_signal(pid_t pid, int signal)
1912 {
1913 bool bret = false;
1914 char *line = NULL;
1915 long unsigned int sigblk = 0;
1916 size_t n = 0;
1917 int ret;
1918 FILE *f;
1919
1920 char status[__PROC_STATUS_LEN];
1921
1922 ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid);
1923 if (ret < 0 || ret >= __PROC_STATUS_LEN)
1924 return bret;
1925
1926 f = fopen(status, "r");
1927 if (!f)
1928 return bret;
1929
1930 while (getline(&line, &n, f) != -1) {
1931 if (strncmp(line, "SigBlk:\t", 8))
1932 continue;
1933
1934 if (sscanf(line + 8, "%lx", &sigblk) != 1)
1935 goto out;
1936 }
1937
1938 if (sigblk & (1LU << (signal - 1)))
1939 bret = true;
1940
1941 out:
1942 free(line);
1943 fclose(f);
1944 return bret;
1945 }
1946
1947 static int lxc_append_null_to_list(void ***list)
1948 {
1949 int newentry = 0;
1950 void **tmp;
1951
1952 if (*list)
1953 for (; (*list)[newentry]; newentry++) {
1954 ;
1955 }
1956
1957 tmp = realloc(*list, (newentry + 2) * sizeof(void **));
1958 if (!tmp)
1959 return -1;
1960
1961 *list = tmp;
1962 (*list)[newentry + 1] = NULL;
1963
1964 return newentry;
1965 }
1966
1967 int lxc_append_string(char ***list, char *entry)
1968 {
1969 char *copy;
1970 int newentry;
1971
1972 newentry = lxc_append_null_to_list((void ***)list);
1973 if (newentry < 0)
1974 return -1;
1975
1976 copy = strdup(entry);
1977 if (!copy)
1978 return -1;
1979
1980 (*list)[newentry] = copy;
1981
1982 return 0;
1983 }
1984
1985 int lxc_preserve_ns(const int pid, const char *ns)
1986 {
1987 int ret;
1988 /* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1989 #define __NS_PATH_LEN 50
1990 char path[__NS_PATH_LEN];
1991
1992 /* This way we can use this function to also check whether namespaces
1993 * are supported by the kernel by passing in the NULL or the empty
1994 * string.
1995 */
1996 ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
1997 !ns || strcmp(ns, "") == 0 ? "" : "/",
1998 !ns || strcmp(ns, "") == 0 ? "" : ns);
1999 if (ret < 0 || (size_t)ret >= __NS_PATH_LEN)
2000 return -1;
2001
2002 return open(path, O_RDONLY | O_CLOEXEC);
2003 }
2004
2005 int lxc_safe_uint(const char *numstr, unsigned int *converted)
2006 {
2007 char *err = NULL;
2008 unsigned long int uli;
2009
2010 while (isspace(*numstr))
2011 numstr++;
2012
2013 if (*numstr == '-')
2014 return -EINVAL;
2015
2016 errno = 0;
2017 uli = strtoul(numstr, &err, 0);
2018 if (errno == ERANGE && uli == ULONG_MAX)
2019 return -ERANGE;
2020
2021 if (err == numstr || *err != '\0')
2022 return -EINVAL;
2023
2024 if (uli > UINT_MAX)
2025 return -ERANGE;
2026
2027 *converted = (unsigned int)uli;
2028 return 0;
2029 }
2030
2031 int lxc_safe_ulong(const char *numstr, unsigned long *converted)
2032 {
2033 char *err = NULL;
2034 unsigned long int uli;
2035
2036 while (isspace(*numstr))
2037 numstr++;
2038
2039 if (*numstr == '-')
2040 return -EINVAL;
2041
2042 errno = 0;
2043 uli = strtoul(numstr, &err, 0);
2044 if (errno == ERANGE && uli == ULONG_MAX)
2045 return -ERANGE;
2046
2047 if (err == numstr || *err != '\0')
2048 return -EINVAL;
2049
2050 *converted = uli;
2051 return 0;
2052 }
2053
2054 int lxc_safe_int(const char *numstr, int *converted)
2055 {
2056 char *err = NULL;
2057 signed long int sli;
2058
2059 errno = 0;
2060 sli = strtol(numstr, &err, 0);
2061 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
2062 return -ERANGE;
2063
2064 if (errno != 0 && sli == 0)
2065 return -EINVAL;
2066
2067 if (err == numstr || *err != '\0')
2068 return -EINVAL;
2069
2070 if (sli > INT_MAX || sli < INT_MIN)
2071 return -ERANGE;
2072
2073 *converted = (int)sli;
2074 return 0;
2075 }
2076
2077 int lxc_safe_long(const char *numstr, long int *converted)
2078 {
2079 char *err = NULL;
2080 signed long int sli;
2081
2082 errno = 0;
2083 sli = strtol(numstr, &err, 0);
2084 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
2085 return -ERANGE;
2086
2087 if (errno != 0 && sli == 0)
2088 return -EINVAL;
2089
2090 if (err == numstr || *err != '\0')
2091 return -EINVAL;
2092
2093 *converted = sli;
2094 return 0;
2095 }
2096
2097 int lxc_switch_uid_gid(uid_t uid, gid_t gid)
2098 {
2099 if (setgid(gid) < 0) {
2100 SYSERROR("Failed to switch to gid %d.", gid);
2101 return -errno;
2102 }
2103 NOTICE("Switched to gid %d.", gid);
2104
2105 if (setuid(uid) < 0) {
2106 SYSERROR("Failed to switch to uid %d.", uid);
2107 return -errno;
2108 }
2109 NOTICE("Switched to uid %d.", uid);
2110
2111 return 0;
2112 }
2113
2114 /* Simple covenience function which enables uniform logging. */
2115 int lxc_setgroups(int size, gid_t list[])
2116 {
2117 if (setgroups(size, list) < 0) {
2118 SYSERROR("Failed to setgroups().");
2119 return -errno;
2120 }
2121 NOTICE("Dropped additional groups.");
2122
2123 return 0;
2124 }
2125
2126 static int lxc_get_unused_loop_dev_legacy(char *loop_name)
2127 {
2128 struct dirent *dp;
2129 struct loop_info64 lo64;
2130 DIR *dir;
2131 int dfd = -1, fd = -1, ret = -1;
2132
2133 dir = opendir("/dev");
2134 if (!dir)
2135 return -1;
2136
2137 while ((dp = readdir(dir))) {
2138 if (!dp)
2139 break;
2140
2141 if (strncmp(dp->d_name, "loop", 4) != 0)
2142 continue;
2143
2144 dfd = dirfd(dir);
2145 if (dfd < 0)
2146 continue;
2147
2148 fd = openat(dfd, dp->d_name, O_RDWR);
2149 if (fd < 0)
2150 continue;
2151
2152 ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
2153 if (ret < 0) {
2154 if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 ||
2155 errno != ENXIO) {
2156 close(fd);
2157 fd = -1;
2158 continue;
2159 }
2160 }
2161
2162 ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name);
2163 if (ret < 0 || ret >= LO_NAME_SIZE) {
2164 close(fd);
2165 fd = -1;
2166 continue;
2167 }
2168
2169 break;
2170 }
2171
2172 closedir(dir);
2173
2174 if (fd < 0)
2175 return -1;
2176
2177 return fd;
2178 }
2179
2180 static int lxc_get_unused_loop_dev(char *name_loop)
2181 {
2182 int loop_nr, ret;
2183 int fd_ctl = -1, fd_tmp = -1;
2184
2185 fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC);
2186 if (fd_ctl < 0)
2187 return -ENODEV;
2188
2189 loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE);
2190 if (loop_nr < 0)
2191 goto on_error;
2192
2193 ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr);
2194 if (ret < 0 || ret >= LO_NAME_SIZE)
2195 goto on_error;
2196
2197 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
2198 if (fd_tmp < 0)
2199 goto on_error;
2200
2201 on_error:
2202 close(fd_ctl);
2203 return fd_tmp;
2204 }
2205
2206 int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags)
2207 {
2208 int ret;
2209 struct loop_info64 lo64;
2210 int fd_img = -1, fret = -1, fd_loop = -1;
2211
2212 fd_loop = lxc_get_unused_loop_dev(loop_dev);
2213 if (fd_loop < 0) {
2214 if (fd_loop == -ENODEV)
2215 fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev);
2216 else
2217 goto on_error;
2218 }
2219
2220 fd_img = open(source, O_RDWR | O_CLOEXEC);
2221 if (fd_img < 0)
2222 goto on_error;
2223
2224 ret = ioctl(fd_loop, LOOP_SET_FD, fd_img);
2225 if (ret < 0)
2226 goto on_error;
2227
2228 memset(&lo64, 0, sizeof(lo64));
2229 lo64.lo_flags = flags;
2230
2231 ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64);
2232 if (ret < 0)
2233 goto on_error;
2234
2235 fret = 0;
2236
2237 on_error:
2238 if (fd_img >= 0)
2239 close(fd_img);
2240
2241 if (fret < 0 && fd_loop >= 0) {
2242 close(fd_loop);
2243 fd_loop = -1;
2244 }
2245
2246 return fd_loop;
2247 }
2248
2249 int lxc_unstack_mountpoint(const char *path, bool lazy)
2250 {
2251 int ret;
2252 int umounts = 0;
2253
2254 pop_stack:
2255 ret = umount2(path, lazy ? MNT_DETACH : 0);
2256 if (ret < 0) {
2257 /* We consider anything else than EINVAL deadly to prevent going
2258 * into an infinite loop. (The other alternative is constantly
2259 * parsing /proc/self/mountinfo which is yucky and probably
2260 * racy.)
2261 */
2262 if (errno != EINVAL)
2263 return -errno;
2264 } else {
2265 /* Just stop counting when this happens. That'd just be so
2266 * stupid that we won't even bother trying to report back the
2267 * correct value anymore.
2268 */
2269 if (umounts != INT_MAX)
2270 umounts++;
2271 /* We succeeded in umounting. Make sure that there's no other
2272 * mountpoint stacked underneath.
2273 */
2274 goto pop_stack;
2275 }
2276
2277 return umounts;
2278 }
2279
2280 int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
2281 {
2282 pid_t child;
2283 int ret, fret, pipefd[2];
2284 ssize_t bytes;
2285
2286 /* Make sure our callers do not receive unitialized memory. */
2287 if (buf_size > 0 && buf)
2288 buf[0] = '\0';
2289
2290 if (pipe(pipefd) < 0) {
2291 SYSERROR("failed to create pipe");
2292 return -1;
2293 }
2294
2295 child = fork();
2296 if (child < 0) {
2297 close(pipefd[0]);
2298 close(pipefd[1]);
2299 SYSERROR("failed to create new process");
2300 return -1;
2301 }
2302
2303 if (child == 0) {
2304 /* Close the read-end of the pipe. */
2305 close(pipefd[0]);
2306
2307 /* Redirect std{err,out} to write-end of the
2308 * pipe.
2309 */
2310 ret = dup2(pipefd[1], STDOUT_FILENO);
2311 if (ret >= 0)
2312 ret = dup2(pipefd[1], STDERR_FILENO);
2313
2314 /* Close the write-end of the pipe. */
2315 close(pipefd[1]);
2316
2317 if (ret < 0) {
2318 SYSERROR("failed to duplicate std{err,out} file descriptor");
2319 exit(EXIT_FAILURE);
2320 }
2321
2322 /* Does not return. */
2323 child_fn(args);
2324 ERROR("failed to exec command");
2325 exit(EXIT_FAILURE);
2326 }
2327
2328 /* close the write-end of the pipe */
2329 close(pipefd[1]);
2330
2331 if (buf && buf_size > 0) {
2332 bytes = read(pipefd[0], buf, buf_size - 1);
2333 if (bytes > 0)
2334 buf[bytes - 1] = '\0';
2335 }
2336
2337 fret = wait_for_pid(child);
2338 /* close the read-end of the pipe */
2339 close(pipefd[0]);
2340
2341 return fret;
2342 }
2343
2344 char *must_make_path(const char *first, ...)
2345 {
2346 va_list args;
2347 char *cur, *dest;
2348 size_t full_len = strlen(first);
2349
2350 dest = must_copy_string(first);
2351
2352 va_start(args, first);
2353 while ((cur = va_arg(args, char *)) != NULL) {
2354 full_len += strlen(cur);
2355 if (cur[0] != '/')
2356 full_len++;
2357 dest = must_realloc(dest, full_len + 1);
2358 if (cur[0] != '/')
2359 strcat(dest, "/");
2360 strcat(dest, cur);
2361 }
2362 va_end(args);
2363
2364 return dest;
2365 }
2366
2367 char *must_copy_string(const char *entry)
2368 {
2369 char *ret;
2370
2371 if (!entry)
2372 return NULL;
2373 do {
2374 ret = strdup(entry);
2375 } while (!ret);
2376
2377 return ret;
2378 }
2379
2380 void *must_realloc(void *orig, size_t sz)
2381 {
2382 void *ret;
2383
2384 do {
2385 ret = realloc(orig, sz);
2386 } while (!ret);
2387
2388 return ret;
2389 }
2390
2391 bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val)
2392 {
2393 return (fs->f_type == (fs_type_magic)magic_val);
2394 }
2395
2396 bool has_fs_type(const char *path, fs_type_magic magic_val)
2397 {
2398 bool has_type;
2399 int ret;
2400 struct statfs sb;
2401
2402 ret = statfs(path, &sb);
2403 if (ret < 0)
2404 return false;
2405
2406 has_type = is_fs_type(&sb, magic_val);
2407 if (!has_type && magic_val == RAMFS_MAGIC)
2408 WARN("When the ramfs it a tmpfs statfs() might report tmpfs");
2409
2410 return has_type;
2411 }
2412
2413 bool lxc_nic_exists(char *nic)
2414 {
2415 #define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
2416 char path[__LXC_SYS_CLASS_NET_LEN];
2417 int ret;
2418 struct stat sb;
2419
2420 if (!strcmp(nic, "none"))
2421 return true;
2422
2423 ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic);
2424 if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN)
2425 return false;
2426
2427 ret = stat(path, &sb);
2428 if (ret < 0)
2429 return false;
2430
2431 return true;
2432 }