]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.c
Merge pull request #1851 from brauner/2017-10-10/setsid_for_mini_init
[mirror_lxc.git] / src / lxc / utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "config.h"
25
26 #define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */
27 #include <ctype.h>
28 #include <dirent.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <grp.h>
32 #include <inttypes.h>
33 #include <libgen.h>
34 #include <stddef.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <sys/mman.h>
40 #include <sys/mount.h>
41 #include <sys/param.h>
42 #include <sys/prctl.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <sys/wait.h>
46
47 #include "log.h"
48 #include "lxclock.h"
49 #include "namespace.h"
50 #include "utils.h"
51
52 #ifndef O_PATH
53 #define O_PATH 010000000
54 #endif
55
56 #ifndef O_NOFOLLOW
57 #define O_NOFOLLOW 00400000
58 #endif
59
60 lxc_log_define(lxc_utils, lxc);
61
62 /*
63 * if path is btrfs, tries to remove it and any subvolumes beneath it
64 */
65 extern bool btrfs_try_remove_subvol(const char *path);
66
67 static int _recursive_rmdir(char *dirname, dev_t pdev,
68 const char *exclude, int level, bool onedev)
69 {
70 struct dirent *direntp;
71 DIR *dir;
72 int ret, failed=0;
73 char pathname[MAXPATHLEN];
74 bool hadexclude = false;
75
76 dir = opendir(dirname);
77 if (!dir) {
78 ERROR("failed to open %s", dirname);
79 return -1;
80 }
81
82 while ((direntp = readdir(dir))) {
83 struct stat mystat;
84 int rc;
85
86 if (!direntp)
87 break;
88
89 if (!strcmp(direntp->d_name, ".") ||
90 !strcmp(direntp->d_name, ".."))
91 continue;
92
93 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
94 if (rc < 0 || rc >= MAXPATHLEN) {
95 ERROR("pathname too long");
96 failed=1;
97 continue;
98 }
99
100 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
101 ret = rmdir(pathname);
102 if (ret < 0) {
103 switch(errno) {
104 case ENOTEMPTY:
105 INFO("Not deleting snapshot %s", pathname);
106 hadexclude = true;
107 break;
108 case ENOTDIR:
109 ret = unlink(pathname);
110 if (ret)
111 INFO("Failed to remove %s", pathname);
112 break;
113 default:
114 SYSERROR("Failed to rmdir %s", pathname);
115 failed = 1;
116 break;
117 }
118 }
119 continue;
120 }
121
122 ret = lstat(pathname, &mystat);
123 if (ret) {
124 ERROR("Failed to stat %s", pathname);
125 failed = 1;
126 continue;
127 }
128 if (onedev && mystat.st_dev != pdev) {
129 /* TODO should we be checking /proc/self/mountinfo for
130 * pathname and not doing this if found? */
131 if (btrfs_try_remove_subvol(pathname))
132 INFO("Removed btrfs subvolume at %s\n", pathname);
133 continue;
134 }
135 if (S_ISDIR(mystat.st_mode)) {
136 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
137 failed=1;
138 } else {
139 if (unlink(pathname) < 0) {
140 SYSERROR("Failed to delete %s", pathname);
141 failed=1;
142 }
143 }
144 }
145
146 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
147 ERROR("Failed to delete %s", dirname);
148 failed=1;
149 }
150
151 ret = closedir(dir);
152 if (ret) {
153 ERROR("Failed to close directory %s", dirname);
154 failed=1;
155 }
156
157 return failed ? -1 : 0;
158 }
159
160 /* We have two different magic values for overlayfs, yay. */
161 #ifndef OVERLAYFS_SUPER_MAGIC
162 #define OVERLAYFS_SUPER_MAGIC 0x794c764f
163 #endif
164
165 #ifndef OVERLAY_SUPER_MAGIC
166 #define OVERLAY_SUPER_MAGIC 0x794c7630
167 #endif
168
169 /* In overlayfs, st_dev is unreliable. So on overlayfs we don't do the
170 * lxc_rmdir_onedev()
171 */
172 static bool is_native_overlayfs(const char *path)
173 {
174 if (has_fs_type(path, OVERLAY_SUPER_MAGIC) ||
175 has_fs_type(path, OVERLAYFS_SUPER_MAGIC))
176 return true;
177
178 return false;
179 }
180
181 /* returns 0 on success, -1 if there were any failures */
182 extern int lxc_rmdir_onedev(char *path, const char *exclude)
183 {
184 struct stat mystat;
185 bool onedev = true;
186
187 if (is_native_overlayfs(path)) {
188 onedev = false;
189 }
190
191 if (lstat(path, &mystat) < 0) {
192 if (errno == ENOENT)
193 return 0;
194 ERROR("Failed to stat %s", path);
195 return -1;
196 }
197
198 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
199 }
200
201 /* borrowed from iproute2 */
202 extern int get_u16(unsigned short *val, const char *arg, int base)
203 {
204 unsigned long res;
205 char *ptr;
206
207 if (!arg || !*arg)
208 return -1;
209
210 errno = 0;
211 res = strtoul(arg, &ptr, base);
212 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
213 return -1;
214
215 *val = res;
216
217 return 0;
218 }
219
220 extern int mkdir_p(const char *dir, mode_t mode)
221 {
222 const char *tmp = dir;
223 const char *orig = dir;
224 char *makeme;
225
226 do {
227 dir = tmp + strspn(tmp, "/");
228 tmp = dir + strcspn(dir, "/");
229 makeme = strndup(orig, dir - orig);
230 if (*makeme) {
231 if (mkdir(makeme, mode) && errno != EEXIST) {
232 SYSERROR("failed to create directory '%s'", makeme);
233 free(makeme);
234 return -1;
235 }
236 }
237 free(makeme);
238 } while(tmp != dir);
239
240 return 0;
241 }
242
243 char *get_rundir()
244 {
245 char *rundir;
246 const char *homedir;
247
248 if (geteuid() == 0) {
249 rundir = strdup(RUNTIME_PATH);
250 return rundir;
251 }
252
253 rundir = getenv("XDG_RUNTIME_DIR");
254 if (rundir) {
255 rundir = strdup(rundir);
256 return rundir;
257 }
258
259 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
260 homedir = getenv("HOME");
261 if (!homedir) {
262 ERROR("HOME isn't set in the environment.");
263 return NULL;
264 }
265
266 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
267 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
268
269 return rundir;
270 }
271
272 int wait_for_pid(pid_t pid)
273 {
274 int status, ret;
275
276 again:
277 ret = waitpid(pid, &status, 0);
278 if (ret == -1) {
279 if (errno == EINTR)
280 goto again;
281 return -1;
282 }
283 if (ret != pid)
284 goto again;
285 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
286 return -1;
287 return 0;
288 }
289
290 int lxc_wait_for_pid_status(pid_t pid)
291 {
292 int status, ret;
293
294 again:
295 ret = waitpid(pid, &status, 0);
296 if (ret == -1) {
297 if (errno == EINTR)
298 goto again;
299 return -1;
300 }
301 if (ret != pid)
302 goto again;
303 return status;
304 }
305
306 ssize_t lxc_write_nointr(int fd, const void* buf, size_t count)
307 {
308 ssize_t ret;
309 again:
310 ret = write(fd, buf, count);
311 if (ret < 0 && errno == EINTR)
312 goto again;
313 return ret;
314 }
315
316 ssize_t lxc_read_nointr(int fd, void* buf, size_t count)
317 {
318 ssize_t ret;
319 again:
320 ret = read(fd, buf, count);
321 if (ret < 0 && errno == EINTR)
322 goto again;
323 return ret;
324 }
325
326 ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf)
327 {
328 ssize_t ret;
329 ret = lxc_read_nointr(fd, buf, count);
330 if (ret <= 0)
331 return ret;
332 if ((size_t)ret != count)
333 return -1;
334 if (expected_buf && memcmp(buf, expected_buf, count) != 0) {
335 errno = EINVAL;
336 return -1;
337 }
338 return ret;
339 }
340
341 #if HAVE_LIBGNUTLS
342 #include <gnutls/gnutls.h>
343 #include <gnutls/crypto.h>
344
345 __attribute__((constructor))
346 static void gnutls_lxc_init(void)
347 {
348 gnutls_global_init();
349 }
350
351 int sha1sum_file(char *fnam, unsigned char *digest)
352 {
353 char *buf;
354 int ret;
355 FILE *f;
356 long flen;
357
358 if (!fnam)
359 return -1;
360 f = fopen_cloexec(fnam, "r");
361 if (!f) {
362 SYSERROR("Error opening template");
363 return -1;
364 }
365 if (fseek(f, 0, SEEK_END) < 0) {
366 SYSERROR("Error seeking to end of template");
367 fclose(f);
368 return -1;
369 }
370 if ((flen = ftell(f)) < 0) {
371 SYSERROR("Error telling size of template");
372 fclose(f);
373 return -1;
374 }
375 if (fseek(f, 0, SEEK_SET) < 0) {
376 SYSERROR("Error seeking to start of template");
377 fclose(f);
378 return -1;
379 }
380 if ((buf = malloc(flen+1)) == NULL) {
381 SYSERROR("Out of memory");
382 fclose(f);
383 return -1;
384 }
385 if (fread(buf, 1, flen, f) != flen) {
386 SYSERROR("Failure reading template");
387 free(buf);
388 fclose(f);
389 return -1;
390 }
391 if (fclose(f) < 0) {
392 SYSERROR("Failre closing template");
393 free(buf);
394 return -1;
395 }
396 buf[flen] = '\0';
397 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
398 free(buf);
399 return ret;
400 }
401 #endif
402
403 char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup)
404 {
405 va_list ap2;
406 size_t count = 1 + skip;
407 char **result;
408
409 /* first determine size of argument list, we don't want to reallocate
410 * constantly...
411 */
412 va_copy(ap2, ap);
413 while (1) {
414 char* arg = va_arg(ap2, char*);
415 if (!arg)
416 break;
417 count++;
418 }
419 va_end(ap2);
420
421 result = calloc(count, sizeof(char*));
422 if (!result)
423 return NULL;
424 count = skip;
425 while (1) {
426 char* arg = va_arg(ap, char*);
427 if (!arg)
428 break;
429 arg = do_strdup ? strdup(arg) : arg;
430 if (!arg)
431 goto oom;
432 result[count++] = arg;
433 }
434
435 /* calloc has already set last element to NULL*/
436 return result;
437
438 oom:
439 free(result);
440 return NULL;
441 }
442
443 const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip)
444 {
445 return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0);
446 }
447
448 struct lxc_popen_FILE *lxc_popen(const char *command)
449 {
450 int ret;
451 int pipe_fds[2];
452 pid_t child_pid;
453 struct lxc_popen_FILE *fp = NULL;
454
455 ret = pipe2(pipe_fds, O_CLOEXEC);
456 if (ret < 0)
457 return NULL;
458
459 child_pid = fork();
460 if (child_pid < 0)
461 goto on_error;
462
463 if (!child_pid) {
464 sigset_t mask;
465
466 close(pipe_fds[0]);
467
468 /* duplicate stdout */
469 if (pipe_fds[1] != STDOUT_FILENO)
470 ret = dup2(pipe_fds[1], STDOUT_FILENO);
471 else
472 ret = fcntl(pipe_fds[1], F_SETFD, 0);
473 if (ret < 0) {
474 close(pipe_fds[1]);
475 exit(EXIT_FAILURE);
476 }
477
478 /* duplicate stderr */
479 if (pipe_fds[1] != STDERR_FILENO)
480 ret = dup2(pipe_fds[1], STDERR_FILENO);
481 else
482 ret = fcntl(pipe_fds[1], F_SETFD, 0);
483 close(pipe_fds[1]);
484 if (ret < 0)
485 exit(EXIT_FAILURE);
486
487 /* unblock all signals */
488 ret = sigfillset(&mask);
489 if (ret < 0)
490 exit(EXIT_FAILURE);
491
492 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
493 if (ret < 0)
494 exit(EXIT_FAILURE);
495
496 execl("/bin/sh", "sh", "-c", command, (char *)NULL);
497 exit(127);
498 }
499
500 close(pipe_fds[1]);
501 pipe_fds[1] = -1;
502
503 fp = malloc(sizeof(*fp));
504 if (!fp)
505 goto on_error;
506
507 fp->child_pid = child_pid;
508 fp->pipe = pipe_fds[0];
509
510 fp->f = fdopen(pipe_fds[0], "r");
511 if (!fp->f)
512 goto on_error;
513
514 return fp;
515
516 on_error:
517 if (fp)
518 free(fp);
519
520 if (pipe_fds[0] >= 0)
521 close(pipe_fds[0]);
522
523 if (pipe_fds[1] >= 0)
524 close(pipe_fds[1]);
525
526 return NULL;
527 }
528
529 int lxc_pclose(struct lxc_popen_FILE *fp)
530 {
531 pid_t wait_pid;
532 int wstatus = 0;
533
534 if (!fp)
535 return -1;
536
537 do {
538 wait_pid = waitpid(fp->child_pid, &wstatus, 0);
539 } while (wait_pid < 0 && errno == EINTR);
540
541 close(fp->pipe);
542 fclose(fp->f);
543 free(fp);
544
545 if (wait_pid < 0)
546 return -1;
547
548 return wstatus;
549 }
550
551 char *lxc_string_replace(const char *needle, const char *replacement, const char *haystack)
552 {
553 ssize_t len = -1, saved_len = -1;
554 char *result = NULL;
555 size_t replacement_len = strlen(replacement);
556 size_t needle_len = strlen(needle);
557
558 /* should be executed exactly twice */
559 while (len == -1 || result == NULL) {
560 char *p;
561 char *last_p;
562 ssize_t part_len;
563
564 if (len != -1) {
565 result = calloc(1, len + 1);
566 if (!result)
567 return NULL;
568 saved_len = len;
569 }
570
571 len = 0;
572
573 for (last_p = (char *)haystack, p = strstr(last_p, needle); p; last_p = p, p = strstr(last_p, needle)) {
574 part_len = (ssize_t)(p - last_p);
575 if (result && part_len > 0)
576 memcpy(&result[len], last_p, part_len);
577 len += part_len;
578 if (result && replacement_len > 0)
579 memcpy(&result[len], replacement, replacement_len);
580 len += replacement_len;
581 p += needle_len;
582 }
583 part_len = strlen(last_p);
584 if (result && part_len > 0)
585 memcpy(&result[len], last_p, part_len);
586 len += part_len;
587 }
588
589 /* make sure we did the same thing twice,
590 * once for calculating length, the other
591 * time for copying data */
592 if (saved_len != len) {
593 free(result);
594 return NULL;
595 }
596 /* make sure we didn't overwrite any buffer,
597 * due to calloc the string should be 0-terminated */
598 if (result[len] != '\0') {
599 free(result);
600 return NULL;
601 }
602
603 return result;
604 }
605
606 bool lxc_string_in_array(const char *needle, const char **haystack)
607 {
608 for (; haystack && *haystack; haystack++)
609 if (!strcmp(needle, *haystack))
610 return true;
611 return false;
612 }
613
614 char *lxc_string_join(const char *sep, const char **parts, bool use_as_prefix)
615 {
616 char *result;
617 char **p;
618 size_t sep_len = strlen(sep);
619 size_t result_len = use_as_prefix * sep_len;
620
621 /* calculate new string length */
622 for (p = (char **)parts; *p; p++)
623 result_len += (p > (char **)parts) * sep_len + strlen(*p);
624
625 result = calloc(result_len + 1, 1);
626 if (!result)
627 return NULL;
628
629 if (use_as_prefix)
630 strcpy(result, sep);
631 for (p = (char **)parts; *p; p++) {
632 if (p > (char **)parts)
633 strcat(result, sep);
634 strcat(result, *p);
635 }
636
637 return result;
638 }
639
640 char **lxc_normalize_path(const char *path)
641 {
642 char **components;
643 char **p;
644 size_t components_len = 0;
645 size_t pos = 0;
646
647 components = lxc_string_split(path, '/');
648 if (!components)
649 return NULL;
650 for (p = components; *p; p++)
651 components_len++;
652
653 /* resolve '.' and '..' */
654 for (pos = 0; pos < components_len; ) {
655 if (!strcmp(components[pos], ".") || (!strcmp(components[pos], "..") && pos == 0)) {
656 /* eat this element */
657 free(components[pos]);
658 memmove(&components[pos], &components[pos+1], sizeof(char *) * (components_len - pos));
659 components_len--;
660 } else if (!strcmp(components[pos], "..")) {
661 /* eat this and the previous element */
662 free(components[pos - 1]);
663 free(components[pos]);
664 memmove(&components[pos-1], &components[pos+1], sizeof(char *) * (components_len - pos));
665 components_len -= 2;
666 pos--;
667 } else {
668 pos++;
669 }
670 }
671
672 return components;
673 }
674
675 char *lxc_deslashify(const char *path)
676 {
677 char *dup, *p;
678 char **parts = NULL;
679 size_t n, len;
680
681 dup = strdup(path);
682 if (!dup)
683 return NULL;
684
685 parts = lxc_normalize_path(dup);
686 if (!parts) {
687 free(dup);
688 return NULL;
689 }
690
691 /* We'll end up here if path == "///" or path == "". */
692 if (!*parts) {
693 len = strlen(dup);
694 if (!len) {
695 lxc_free_array((void **)parts, free);
696 return dup;
697 }
698 n = strcspn(dup, "/");
699 if (n == len) {
700 free(dup);
701 lxc_free_array((void **)parts, free);
702
703 p = strdup("/");
704 if (!p)
705 return NULL;
706
707 return p;
708 }
709 }
710
711 p = lxc_string_join("/", (const char **)parts, *dup == '/');
712 free(dup);
713 lxc_free_array((void **)parts, free);
714 return p;
715 }
716
717 char *lxc_append_paths(const char *first, const char *second)
718 {
719 size_t len = strlen(first) + strlen(second) + 1;
720 const char *pattern = "%s%s";
721 char *result = NULL;
722
723 if (second[0] != '/') {
724 len += 1;
725 pattern = "%s/%s";
726 }
727
728 result = calloc(1, len);
729 if (!result)
730 return NULL;
731
732 snprintf(result, len, pattern, first, second);
733 return result;
734 }
735
736 bool lxc_string_in_list(const char *needle, const char *haystack, char _sep)
737 {
738 char *token, *str, *saveptr = NULL;
739 char sep[2] = { _sep, '\0' };
740
741 if (!haystack || !needle)
742 return 0;
743
744 str = alloca(strlen(haystack)+1);
745 strcpy(str, haystack);
746 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
747 if (strcmp(needle, token) == 0)
748 return 1;
749 }
750
751 return 0;
752 }
753
754 char **lxc_string_split(const char *string, char _sep)
755 {
756 char *token, *str, *saveptr = NULL;
757 char sep[2] = {_sep, '\0'};
758 char **tmp = NULL, **result = NULL;
759 size_t result_capacity = 0;
760 size_t result_count = 0;
761 int r, saved_errno;
762
763 if (!string)
764 return calloc(1, sizeof(char *));
765
766 str = alloca(strlen(string) + 1);
767 strcpy(str, string);
768 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
769 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
770 if (r < 0)
771 goto error_out;
772 result[result_count] = strdup(token);
773 if (!result[result_count])
774 goto error_out;
775 result_count++;
776 }
777
778 /* if we allocated too much, reduce it */
779 tmp = realloc(result, (result_count + 1) * sizeof(char *));
780 if (!tmp)
781 goto error_out;
782 result = tmp;
783 /* Make sure we don't return uninitialized memory. */
784 if (result_count == 0)
785 *result = NULL;
786 return result;
787 error_out:
788 saved_errno = errno;
789 lxc_free_array((void **)result, free);
790 errno = saved_errno;
791 return NULL;
792 }
793
794 static bool complete_word(char ***result, char *start, char *end, size_t *cap, size_t *cnt)
795 {
796 int r;
797
798 r = lxc_grow_array((void ***)result, cap, 2 + *cnt, 16);
799 if (r < 0)
800 return false;
801 (*result)[*cnt] = strndup(start, end - start);
802 if (!(*result)[*cnt])
803 return false;
804 (*cnt)++;
805
806 return true;
807 }
808
809 /*
810 * Given a a string 'one two "three four"', split into three words,
811 * one, two, and "three four"
812 */
813 char **lxc_string_split_quoted(char *string)
814 {
815 char *nextword = string, *p, state;
816 char **result = NULL;
817 size_t result_capacity = 0;
818 size_t result_count = 0;
819
820 if (!string || !*string)
821 return calloc(1, sizeof(char *));
822
823 // TODO I'm *not* handling escaped quote
824 state = ' ';
825 for (p = string; *p; p++) {
826 switch(state) {
827 case ' ':
828 if (isspace(*p))
829 continue;
830 else if (*p == '"' || *p == '\'') {
831 nextword = p;
832 state = *p;
833 continue;
834 }
835 nextword = p;
836 state = 'a';
837 continue;
838 case 'a':
839 if (isspace(*p)) {
840 complete_word(&result, nextword, p, &result_capacity, &result_count);
841 state = ' ';
842 continue;
843 }
844 continue;
845 case '"':
846 case '\'':
847 if (*p == state) {
848 complete_word(&result, nextword+1, p, &result_capacity, &result_count);
849 state = ' ';
850 continue;
851 }
852 continue;
853 }
854 }
855
856 if (state == 'a')
857 complete_word(&result, nextword, p, &result_capacity, &result_count);
858
859 return realloc(result, (result_count + 1) * sizeof(char *));
860 }
861
862 char **lxc_string_split_and_trim(const char *string, char _sep)
863 {
864 char *token, *str, *saveptr = NULL;
865 char sep[2] = { _sep, '\0' };
866 char **result = NULL;
867 size_t result_capacity = 0;
868 size_t result_count = 0;
869 int r, saved_errno;
870 size_t i = 0;
871
872 if (!string)
873 return calloc(1, sizeof(char *));
874
875 str = alloca(strlen(string)+1);
876 strcpy(str, string);
877 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
878 while (token[0] == ' ' || token[0] == '\t')
879 token++;
880 i = strlen(token);
881 while (i > 0 && (token[i - 1] == ' ' || token[i - 1] == '\t')) {
882 token[i - 1] = '\0';
883 i--;
884 }
885 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
886 if (r < 0)
887 goto error_out;
888 result[result_count] = strdup(token);
889 if (!result[result_count])
890 goto error_out;
891 result_count++;
892 }
893
894 /* if we allocated too much, reduce it */
895 return realloc(result, (result_count + 1) * sizeof(char *));
896 error_out:
897 saved_errno = errno;
898 lxc_free_array((void **)result, free);
899 errno = saved_errno;
900 return NULL;
901 }
902
903 void lxc_free_array(void **array, lxc_free_fn element_free_fn)
904 {
905 void **p;
906 for (p = array; p && *p; p++)
907 element_free_fn(*p);
908 free((void*)array);
909 }
910
911 int lxc_grow_array(void ***array, size_t* capacity, size_t new_size, size_t capacity_increment)
912 {
913 size_t new_capacity;
914 void **new_array;
915
916 /* first time around, catch some trivial mistakes of the user
917 * only initializing one of these */
918 if (!*array || !*capacity) {
919 *array = NULL;
920 *capacity = 0;
921 }
922
923 new_capacity = *capacity;
924 while (new_size + 1 > new_capacity)
925 new_capacity += capacity_increment;
926 if (new_capacity != *capacity) {
927 /* we have to reallocate */
928 new_array = realloc(*array, new_capacity * sizeof(void *));
929 if (!new_array)
930 return -1;
931 memset(&new_array[*capacity], 0, (new_capacity - (*capacity)) * sizeof(void *));
932 *array = new_array;
933 *capacity = new_capacity;
934 }
935
936 /* array has sufficient elements */
937 return 0;
938 }
939
940 size_t lxc_array_len(void **array)
941 {
942 void **p;
943 size_t result = 0;
944
945 for (p = array; p && *p; p++)
946 result++;
947
948 return result;
949 }
950
951 int lxc_write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
952 {
953 int fd, saved_errno;
954 ssize_t ret;
955
956 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
957 if (fd < 0)
958 return -1;
959 ret = lxc_write_nointr(fd, buf, count);
960 if (ret < 0)
961 goto out_error;
962 if ((size_t)ret != count)
963 goto out_error;
964 if (add_newline) {
965 ret = lxc_write_nointr(fd, "\n", 1);
966 if (ret != 1)
967 goto out_error;
968 }
969 close(fd);
970 return 0;
971
972 out_error:
973 saved_errno = errno;
974 close(fd);
975 errno = saved_errno;
976 return -1;
977 }
978
979 int lxc_read_from_file(const char *filename, void* buf, size_t count)
980 {
981 int fd = -1, saved_errno;
982 ssize_t ret;
983
984 fd = open(filename, O_RDONLY | O_CLOEXEC);
985 if (fd < 0)
986 return -1;
987
988 if (!buf || !count) {
989 char buf2[100];
990 size_t count2 = 0;
991 while ((ret = read(fd, buf2, 100)) > 0)
992 count2 += ret;
993 if (ret >= 0)
994 ret = count2;
995 } else {
996 memset(buf, 0, count);
997 ret = read(fd, buf, count);
998 }
999
1000 if (ret < 0)
1001 ERROR("read %s: %s", filename, strerror(errno));
1002
1003 saved_errno = errno;
1004 close(fd);
1005 errno = saved_errno;
1006 return ret;
1007 }
1008
1009 void **lxc_append_null_to_array(void **array, size_t count)
1010 {
1011 void **temp;
1012
1013 /* Append NULL to the array */
1014 if (count) {
1015 temp = realloc(array, (count + 1) * sizeof(*array));
1016 if (!temp) {
1017 size_t i;
1018 for (i = 0; i < count; i++)
1019 free(array[i]);
1020 free(array);
1021 return NULL;
1022 }
1023 array = temp;
1024 array[count] = NULL;
1025 }
1026 return array;
1027 }
1028
1029 int randseed(bool srand_it)
1030 {
1031 /*
1032 srand pre-seed function based on /dev/urandom
1033 */
1034 unsigned int seed = time(NULL) + getpid();
1035
1036 FILE *f;
1037 f = fopen("/dev/urandom", "r");
1038 if (f) {
1039 int ret = fread(&seed, sizeof(seed), 1, f);
1040 if (ret != 1)
1041 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno));
1042 fclose(f);
1043 }
1044
1045 if (srand_it)
1046 srand(seed);
1047
1048 return seed;
1049 }
1050
1051 uid_t get_ns_uid(uid_t orig)
1052 {
1053 char *line = NULL;
1054 size_t sz = 0;
1055 uid_t nsid, hostid, range;
1056 FILE *f = fopen("/proc/self/uid_map", "r");
1057 if (!f)
1058 return 0;
1059
1060 while (getline(&line, &sz, f) != -1) {
1061 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
1062 continue;
1063 if (hostid <= orig && hostid + range > orig) {
1064 nsid += orig - hostid;
1065 goto found;
1066 }
1067 }
1068
1069 nsid = 0;
1070 found:
1071 fclose(f);
1072 free(line);
1073 return nsid;
1074 }
1075
1076 bool dir_exists(const char *path)
1077 {
1078 struct stat sb;
1079 int ret;
1080
1081 ret = stat(path, &sb);
1082 if (ret < 0)
1083 /* Could be something other than eexist, just say "no". */
1084 return false;
1085 return S_ISDIR(sb.st_mode);
1086 }
1087
1088 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1089 * FNV has good anti collision properties and we're not worried
1090 * about pre-image resistance or one-way-ness, we're just trying to make
1091 * the name unique in the 108 bytes of space we have.
1092 */
1093 uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1094 {
1095 unsigned char *bp;
1096
1097 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1098 {
1099 /* xor the bottom with the current octet */
1100 hval ^= (uint64_t)*bp;
1101
1102 /* gcc optimised:
1103 * multiply by the 64 bit FNV magic prime mod 2^64
1104 */
1105 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1106 (hval << 7) + (hval << 8) + (hval << 40);
1107 }
1108
1109 return hval;
1110 }
1111
1112 /*
1113 * Detect whether / is mounted MS_SHARED. The only way I know of to
1114 * check that is through /proc/self/mountinfo.
1115 * I'm only checking for /. If the container rootfs or mount location
1116 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1117 * out would be too much work to be worth it.
1118 */
1119 int detect_shared_rootfs(void)
1120 {
1121 char buf[LXC_LINELEN], *p;
1122 FILE *f;
1123 int i;
1124 char *p2;
1125
1126 f = fopen("/proc/self/mountinfo", "r");
1127 if (!f)
1128 return 0;
1129 while (fgets(buf, LXC_LINELEN, f)) {
1130 for (p = buf, i = 0; p && i < 4; i++)
1131 p = strchr(p + 1, ' ');
1132 if (!p)
1133 continue;
1134 p2 = strchr(p + 1, ' ');
1135 if (!p2)
1136 continue;
1137 *p2 = '\0';
1138 if (strcmp(p + 1, "/") == 0) {
1139 /* This is '/'. Is it shared? */
1140 p = strchr(p2 + 1, ' ');
1141 if (p && strstr(p, "shared:")) {
1142 fclose(f);
1143 return 1;
1144 }
1145 }
1146 }
1147 fclose(f);
1148 return 0;
1149 }
1150
1151 bool switch_to_ns(pid_t pid, const char *ns) {
1152 int fd, ret;
1153 char nspath[MAXPATHLEN];
1154
1155 /* Switch to new ns */
1156 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
1157 if (ret < 0 || ret >= MAXPATHLEN)
1158 return false;
1159
1160 fd = open(nspath, O_RDONLY);
1161 if (fd < 0) {
1162 SYSERROR("failed to open %s", nspath);
1163 return false;
1164 }
1165
1166 ret = setns(fd, 0);
1167 if (ret) {
1168 SYSERROR("failed to set process %d to %s of %d.", pid, ns, fd);
1169 close(fd);
1170 return false;
1171 }
1172 close(fd);
1173 return true;
1174 }
1175
1176 /*
1177 * looking at fs/proc_namespace.c, it appears we can
1178 * actually expect the rootfs entry to very specifically contain
1179 * " - rootfs rootfs "
1180 * IIUC, so long as we've chrooted so that rootfs is not our root,
1181 * the rootfs entry should always be skipped in mountinfo contents.
1182 */
1183 bool detect_ramfs_rootfs(void)
1184 {
1185 FILE *f;
1186 char *p, *p2;
1187 char *line = NULL;
1188 size_t len = 0;
1189 int i;
1190
1191 f = fopen("/proc/self/mountinfo", "r");
1192 if (!f)
1193 return false;
1194
1195 while (getline(&line, &len, f) != -1) {
1196 for (p = line, i = 0; p && i < 4; i++)
1197 p = strchr(p + 1, ' ');
1198 if (!p)
1199 continue;
1200 p2 = strchr(p + 1, ' ');
1201 if (!p2)
1202 continue;
1203 *p2 = '\0';
1204 if (strcmp(p + 1, "/") == 0) {
1205 /* This is '/'. Is it the ramfs? */
1206 p = strchr(p2 + 1, '-');
1207 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
1208 free(line);
1209 fclose(f);
1210 return true;
1211 }
1212 }
1213 }
1214 free(line);
1215 fclose(f);
1216 return false;
1217 }
1218
1219 char *on_path(const char *cmd, const char *rootfs) {
1220 char *path = NULL;
1221 char *entry = NULL;
1222 char *saveptr = NULL;
1223 char cmdpath[MAXPATHLEN];
1224 int ret;
1225
1226 path = getenv("PATH");
1227 if (!path)
1228 return NULL;
1229
1230 path = strdup(path);
1231 if (!path)
1232 return NULL;
1233
1234 entry = strtok_r(path, ":", &saveptr);
1235 while (entry) {
1236 if (rootfs)
1237 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs, entry, cmd);
1238 else
1239 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
1240
1241 if (ret < 0 || ret >= MAXPATHLEN)
1242 goto next_loop;
1243
1244 if (access(cmdpath, X_OK) == 0) {
1245 free(path);
1246 return strdup(cmdpath);
1247 }
1248
1249 next_loop:
1250 entry = strtok_r(NULL, ":", &saveptr);
1251 }
1252
1253 free(path);
1254 return NULL;
1255 }
1256
1257 bool file_exists(const char *f)
1258 {
1259 struct stat statbuf;
1260
1261 return stat(f, &statbuf) == 0;
1262 }
1263
1264 bool cgns_supported(void)
1265 {
1266 return file_exists("/proc/self/ns/cgroup");
1267 }
1268
1269 /* historically lxc-init has been under /usr/lib/lxc and under
1270 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1271 */
1272 char *choose_init(const char *rootfs)
1273 {
1274 char *retv = NULL;
1275 const char *empty = "",
1276 *tmp;
1277 int ret, env_set = 0;
1278
1279 if (!getenv("PATH")) {
1280 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1281 SYSERROR("Failed to setenv");
1282 env_set = 1;
1283 }
1284
1285 retv = on_path("init.lxc", rootfs);
1286
1287 if (env_set) {
1288 if (unsetenv("PATH"))
1289 SYSERROR("Failed to unsetenv");
1290 }
1291
1292 if (retv)
1293 return retv;
1294
1295 retv = malloc(PATH_MAX);
1296 if (!retv)
1297 return NULL;
1298
1299 if (rootfs)
1300 tmp = rootfs;
1301 else
1302 tmp = empty;
1303
1304 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
1305 if (ret < 0 || ret >= PATH_MAX) {
1306 ERROR("pathname too long");
1307 goto out1;
1308 }
1309 if (access(retv, X_OK) == 0)
1310 return retv;
1311
1312 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
1313 if (ret < 0 || ret >= PATH_MAX) {
1314 ERROR("pathname too long");
1315 goto out1;
1316 }
1317 if (access(retv, X_OK) == 0)
1318 return retv;
1319
1320 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
1321 if (ret < 0 || ret >= PATH_MAX) {
1322 ERROR("pathname too long");
1323 goto out1;
1324 }
1325 if (access(retv, X_OK) == 0)
1326 return retv;
1327
1328 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
1329 if (ret < 0 || ret >= PATH_MAX) {
1330 ERROR("pathname too long");
1331 goto out1;
1332 }
1333 if (access(retv, X_OK) == 0)
1334 return retv;
1335
1336 /*
1337 * Last resort, look for the statically compiled init.lxc which we
1338 * hopefully bind-mounted in.
1339 * If we are called during container setup, and we get to this point,
1340 * then the init.lxc.static from the host will need to be bind-mounted
1341 * in. So we return NULL here to indicate that.
1342 */
1343 if (rootfs)
1344 goto out1;
1345
1346 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
1347 if (ret < 0 || ret >= PATH_MAX) {
1348 WARN("Nonsense - name /lxc.init.static too long");
1349 goto out1;
1350 }
1351 if (access(retv, X_OK) == 0)
1352 return retv;
1353
1354 out1:
1355 free(retv);
1356 return NULL;
1357 }
1358
1359 int print_to_file(const char *file, const char *content)
1360 {
1361 FILE *f;
1362 int ret = 0;
1363
1364 f = fopen(file, "w");
1365 if (!f)
1366 return -1;
1367 if (fprintf(f, "%s", content) != strlen(content))
1368 ret = -1;
1369 fclose(f);
1370 return ret;
1371 }
1372
1373 int is_dir(const char *path)
1374 {
1375 struct stat statbuf;
1376 int ret = stat(path, &statbuf);
1377 if (ret == 0 && S_ISDIR(statbuf.st_mode))
1378 return 1;
1379 return 0;
1380 }
1381
1382 /*
1383 * Given the '-t' template option to lxc-create, figure out what to
1384 * do. If the template is a full executable path, use that. If it
1385 * is something like 'sshd', then return $templatepath/lxc-sshd.
1386 * On success return the template, on error return NULL.
1387 */
1388 char *get_template_path(const char *t)
1389 {
1390 int ret, len;
1391 char *tpath;
1392
1393 if (t[0] == '/' && access(t, X_OK) == 0) {
1394 tpath = strdup(t);
1395 return tpath;
1396 }
1397
1398 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
1399 tpath = malloc(len);
1400 if (!tpath)
1401 return NULL;
1402 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
1403 if (ret < 0 || ret >= len) {
1404 free(tpath);
1405 return NULL;
1406 }
1407 if (access(tpath, X_OK) < 0) {
1408 SYSERROR("bad template: %s", t);
1409 free(tpath);
1410 return NULL;
1411 }
1412
1413 return tpath;
1414 }
1415
1416 /*
1417 * @path: a pathname where / replaced with '\0'.
1418 * @offsetp: pointer to int showing which path segment was last seen.
1419 * Updated on return to reflect the next segment.
1420 * @fulllen: full original path length.
1421 * Returns a pointer to the next path segment, or NULL if done.
1422 */
1423 static char *get_nextpath(char *path, int *offsetp, int fulllen)
1424 {
1425 int offset = *offsetp;
1426
1427 if (offset >= fulllen)
1428 return NULL;
1429
1430 while (path[offset] != '\0' && offset < fulllen)
1431 offset++;
1432 while (path[offset] == '\0' && offset < fulllen)
1433 offset++;
1434
1435 *offsetp = offset;
1436 return (offset < fulllen) ? &path[offset] : NULL;
1437 }
1438
1439 /*
1440 * Check that @subdir is a subdir of @dir. @len is the length of
1441 * @dir (to avoid having to recalculate it).
1442 */
1443 static bool is_subdir(const char *subdir, const char *dir, size_t len)
1444 {
1445 size_t subdirlen = strlen(subdir);
1446
1447 if (subdirlen < len)
1448 return false;
1449 if (strncmp(subdir, dir, len) != 0)
1450 return false;
1451 if (dir[len-1] == '/')
1452 return true;
1453 if (subdir[len] == '/' || subdirlen == len)
1454 return true;
1455 return false;
1456 }
1457
1458 /*
1459 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
1460 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
1461 */
1462 static int check_symlink(int fd)
1463 {
1464 struct stat sb;
1465 int ret = fstat(fd, &sb);
1466 if (ret < 0)
1467 return -ENOENT;
1468 if (S_ISLNK(sb.st_mode))
1469 return -ELOOP;
1470 return 0;
1471 }
1472
1473 /*
1474 * Open a file or directory, provided that it contains no symlinks.
1475 *
1476 * CAVEAT: This function must not be used for other purposes than container
1477 * setup before executing the container's init
1478 */
1479 static int open_if_safe(int dirfd, const char *nextpath)
1480 {
1481 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1482 if (newfd >= 0) /* Was not a symlink, all good. */
1483 return newfd;
1484
1485 if (errno == ELOOP)
1486 return newfd;
1487
1488 if (errno == EPERM || errno == EACCES) {
1489 /* We're not root (cause we got EPERM) so try opening with
1490 * O_PATH.
1491 */
1492 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
1493 if (newfd >= 0) {
1494 /* O_PATH will return an fd for symlinks. We know
1495 * nextpath wasn't a symlink at last openat, so if fd is
1496 * now a link, then something * fishy is going on.
1497 */
1498 int ret = check_symlink(newfd);
1499 if (ret < 0) {
1500 close(newfd);
1501 newfd = ret;
1502 }
1503 }
1504 }
1505
1506 return newfd;
1507 }
1508
1509 /*
1510 * Open a path intending for mounting, ensuring that the final path
1511 * is inside the container's rootfs.
1512 *
1513 * CAVEAT: This function must not be used for other purposes than container
1514 * setup before executing the container's init
1515 *
1516 * @target: path to be opened
1517 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1518 * would be the container's rootfs.
1519 *
1520 * Return an open fd for the path, or <0 on error.
1521 */
1522 static int open_without_symlink(const char *target, const char *prefix_skip)
1523 {
1524 int curlen = 0, dirfd, fulllen, i;
1525 char *dup = NULL;
1526
1527 fulllen = strlen(target);
1528
1529 /* make sure prefix-skip makes sense */
1530 if (prefix_skip && strlen(prefix_skip) > 0) {
1531 curlen = strlen(prefix_skip);
1532 if (!is_subdir(target, prefix_skip, curlen)) {
1533 ERROR("WHOA there - target '%s' didn't start with prefix '%s'",
1534 target, prefix_skip);
1535 return -EINVAL;
1536 }
1537 /*
1538 * get_nextpath() expects the curlen argument to be
1539 * on a (turned into \0) / or before it, so decrement
1540 * curlen to make sure that happens
1541 */
1542 if (curlen)
1543 curlen--;
1544 } else {
1545 prefix_skip = "/";
1546 curlen = 0;
1547 }
1548
1549 /* Make a copy of target which we can hack up, and tokenize it */
1550 if ((dup = strdup(target)) == NULL) {
1551 SYSERROR("Out of memory checking for symbolic link");
1552 return -ENOMEM;
1553 }
1554 for (i = 0; i < fulllen; i++) {
1555 if (dup[i] == '/')
1556 dup[i] = '\0';
1557 }
1558
1559 dirfd = open(prefix_skip, O_RDONLY);
1560 if (dirfd < 0)
1561 goto out;
1562 while (1) {
1563 int newfd, saved_errno;
1564 char *nextpath;
1565
1566 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1567 goto out;
1568 newfd = open_if_safe(dirfd, nextpath);
1569 saved_errno = errno;
1570 close(dirfd);
1571 dirfd = newfd;
1572 if (newfd < 0) {
1573 errno = saved_errno;
1574 if (errno == ELOOP)
1575 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
1576 goto out;
1577 }
1578 }
1579
1580 out:
1581 free(dup);
1582 return dirfd;
1583 }
1584
1585 /*
1586 * Safely mount a path into a container, ensuring that the mount target
1587 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1588 * uses the host's /)
1589 *
1590 * CAVEAT: This function must not be used for other purposes than container
1591 * setup before executing the container's init
1592 */
1593 int safe_mount(const char *src, const char *dest, const char *fstype,
1594 unsigned long flags, const void *data, const char *rootfs)
1595 {
1596 int destfd, ret, saved_errno;
1597 /* Only needs enough for /proc/self/fd/<fd>. */
1598 char srcbuf[50], destbuf[50];
1599 int srcfd = -1;
1600 const char *mntsrc = src;
1601
1602 if (!rootfs)
1603 rootfs = "";
1604
1605 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1606 if (flags & MS_BIND && src && src[0] != '/') {
1607 INFO("this is a relative bind mount");
1608 srcfd = open_without_symlink(src, NULL);
1609 if (srcfd < 0)
1610 return srcfd;
1611 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1612 if (ret < 0 || ret > 50) {
1613 close(srcfd);
1614 ERROR("Out of memory");
1615 return -EINVAL;
1616 }
1617 mntsrc = srcbuf;
1618 }
1619
1620 destfd = open_without_symlink(dest, rootfs);
1621 if (destfd < 0) {
1622 if (srcfd != -1) {
1623 saved_errno = errno;
1624 close(srcfd);
1625 errno = saved_errno;
1626 }
1627 return destfd;
1628 }
1629
1630 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1631 if (ret < 0 || ret > 50) {
1632 if (srcfd != -1)
1633 close(srcfd);
1634 close(destfd);
1635 ERROR("Out of memory");
1636 return -EINVAL;
1637 }
1638
1639 ret = mount(mntsrc, destbuf, fstype, flags, data);
1640 saved_errno = errno;
1641 if (srcfd != -1)
1642 close(srcfd);
1643 close(destfd);
1644 if (ret < 0) {
1645 errno = saved_errno;
1646 SYSERROR("Failed to mount %s onto %s", src, dest);
1647 return ret;
1648 }
1649
1650 return 0;
1651 }
1652
1653 /*
1654 * Mount a proc under @rootfs if proc self points to a pid other than
1655 * my own. This is needed to have a known-good proc mount for setting
1656 * up LSMs both at container startup and attach.
1657 *
1658 * @rootfs : the rootfs where proc should be mounted
1659 *
1660 * Returns < 0 on failure, 0 if the correct proc was already mounted
1661 * and 1 if a new proc was mounted.
1662 *
1663 * NOTE: not to be called from inside the container namespace!
1664 */
1665 int lxc_mount_proc_if_needed(const char *rootfs)
1666 {
1667 char path[MAXPATHLEN];
1668 int link_to_pid, linklen, mypid, ret;
1669 char link[LXC_NUMSTRLEN64] = {0};
1670
1671 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1672 if (ret < 0 || ret >= MAXPATHLEN) {
1673 SYSERROR("proc path name too long");
1674 return -1;
1675 }
1676
1677 linklen = readlink(path, link, LXC_NUMSTRLEN64);
1678
1679 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
1680 if (ret < 0 || ret >= MAXPATHLEN) {
1681 SYSERROR("proc path name too long");
1682 return -1;
1683 }
1684
1685 /* /proc not mounted */
1686 if (linklen < 0) {
1687 if (mkdir(path, 0755) && errno != EEXIST)
1688 return -1;
1689 goto domount;
1690 } else if (linklen >= LXC_NUMSTRLEN64) {
1691 link[linklen - 1] = '\0';
1692 ERROR("readlink returned truncated content: \"%s\"", link);
1693 return -1;
1694 }
1695
1696 mypid = getpid();
1697 INFO("I am %d, /proc/self points to \"%s\"", mypid, link);
1698
1699 if (lxc_safe_int(link, &link_to_pid) < 0)
1700 return -1;
1701
1702 /* correct procfs is already mounted */
1703 if (link_to_pid == mypid)
1704 return 0;
1705
1706 ret = umount2(path, MNT_DETACH);
1707 if (ret < 0)
1708 WARN("failed to umount \"%s\" with MNT_DETACH", path);
1709
1710 domount:
1711 /* rootfs is NULL */
1712 if (!strcmp(rootfs, ""))
1713 ret = mount("proc", path, "proc", 0, NULL);
1714 else
1715 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
1716 if (ret < 0)
1717 return -1;
1718
1719 INFO("mounted /proc in container for security transition");
1720 return 1;
1721 }
1722
1723 int open_devnull(void)
1724 {
1725 int fd = open("/dev/null", O_RDWR);
1726
1727 if (fd < 0)
1728 SYSERROR("Can't open /dev/null");
1729
1730 return fd;
1731 }
1732
1733 int set_stdfds(int fd)
1734 {
1735 int ret;
1736
1737 if (fd < 0)
1738 return -1;
1739
1740 ret = dup2(fd, STDIN_FILENO);
1741 if (ret < 0)
1742 return -1;
1743
1744 ret = dup2(fd, STDOUT_FILENO);
1745 if (ret < 0)
1746 return -1;
1747
1748 ret = dup2(fd, STDERR_FILENO);
1749 if (ret < 0)
1750 return -1;
1751
1752 return 0;
1753 }
1754
1755 int null_stdfds(void)
1756 {
1757 int ret = -1;
1758 int fd = open_devnull();
1759
1760 if (fd >= 0) {
1761 ret = set_stdfds(fd);
1762 close(fd);
1763 }
1764
1765 return ret;
1766 }
1767
1768 /*
1769 * Return the number of lines in file @fn, or -1 on error
1770 */
1771 int lxc_count_file_lines(const char *fn)
1772 {
1773 FILE *f;
1774 char *line = NULL;
1775 size_t sz = 0;
1776 int n = 0;
1777
1778 f = fopen_cloexec(fn, "r");
1779 if (!f)
1780 return -1;
1781
1782 while (getline(&line, &sz, f) != -1) {
1783 n++;
1784 }
1785 free(line);
1786 fclose(f);
1787 return n;
1788 }
1789
1790 void *lxc_strmmap(void *addr, size_t length, int prot, int flags, int fd,
1791 off_t offset)
1792 {
1793 void *tmp = NULL, *overlap = NULL;
1794
1795 /* We establish an anonymous mapping that is one byte larger than the
1796 * underlying file. The pages handed to us are zero filled. */
1797 tmp = mmap(addr, length + 1, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1798 if (tmp == MAP_FAILED)
1799 return tmp;
1800
1801 /* Now we establish a fixed-address mapping starting at the address we
1802 * received from our anonymous mapping and replace all bytes excluding
1803 * the additional \0-byte with the file. This allows us to use normal
1804 * string-handling functions. */
1805 overlap = mmap(tmp, length, prot, MAP_FIXED | flags, fd, offset);
1806 if (overlap == MAP_FAILED)
1807 munmap(tmp, length + 1);
1808
1809 return overlap;
1810 }
1811
1812 int lxc_strmunmap(void *addr, size_t length)
1813 {
1814 return munmap(addr, length + 1);
1815 }
1816
1817 /* Check whether a signal is blocked by a process. */
1818 /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
1819 #define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
1820 bool task_blocking_signal(pid_t pid, int signal)
1821 {
1822 bool bret = false;
1823 char *line = NULL;
1824 long unsigned int sigblk = 0;
1825 size_t n = 0;
1826 int ret;
1827 FILE *f;
1828
1829 char status[__PROC_STATUS_LEN];
1830
1831 ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid);
1832 if (ret < 0 || ret >= __PROC_STATUS_LEN)
1833 return bret;
1834
1835 f = fopen(status, "r");
1836 if (!f)
1837 return bret;
1838
1839 while (getline(&line, &n, f) != -1) {
1840 if (strncmp(line, "SigBlk:\t", 8))
1841 continue;
1842
1843 if (sscanf(line + 8, "%lx", &sigblk) != 1)
1844 goto out;
1845 }
1846
1847 if (sigblk & (1LU << (signal - 1)))
1848 bret = true;
1849
1850 out:
1851 free(line);
1852 fclose(f);
1853 return bret;
1854 }
1855
1856 static int lxc_append_null_to_list(void ***list)
1857 {
1858 int newentry = 0;
1859 void **tmp;
1860
1861 if (*list)
1862 for (; (*list)[newentry]; newentry++) {
1863 ;
1864 }
1865
1866 tmp = realloc(*list, (newentry + 2) * sizeof(void **));
1867 if (!tmp)
1868 return -1;
1869
1870 *list = tmp;
1871 (*list)[newentry + 1] = NULL;
1872
1873 return newentry;
1874 }
1875
1876 int lxc_append_string(char ***list, char *entry)
1877 {
1878 char *copy;
1879 int newentry;
1880
1881 newentry = lxc_append_null_to_list((void ***)list);
1882 if (newentry < 0)
1883 return -1;
1884
1885 copy = strdup(entry);
1886 if (!copy)
1887 return -1;
1888
1889 (*list)[newentry] = copy;
1890
1891 return 0;
1892 }
1893
1894 int lxc_preserve_ns(const int pid, const char *ns)
1895 {
1896 int ret;
1897 /* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1898 #define __NS_PATH_LEN 50
1899 char path[__NS_PATH_LEN];
1900
1901 /* This way we can use this function to also check whether namespaces
1902 * are supported by the kernel by passing in the NULL or the empty
1903 * string.
1904 */
1905 ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
1906 !ns || strcmp(ns, "") == 0 ? "" : "/",
1907 !ns || strcmp(ns, "") == 0 ? "" : ns);
1908 if (ret < 0 || (size_t)ret >= __NS_PATH_LEN)
1909 return -1;
1910
1911 return open(path, O_RDONLY | O_CLOEXEC);
1912 }
1913
1914 int lxc_safe_uint(const char *numstr, unsigned int *converted)
1915 {
1916 char *err = NULL;
1917 unsigned long int uli;
1918
1919 while (isspace(*numstr))
1920 numstr++;
1921
1922 if (*numstr == '-')
1923 return -EINVAL;
1924
1925 errno = 0;
1926 uli = strtoul(numstr, &err, 0);
1927 if (errno == ERANGE && uli == ULONG_MAX)
1928 return -ERANGE;
1929
1930 if (err == numstr || *err != '\0')
1931 return -EINVAL;
1932
1933 if (uli > UINT_MAX)
1934 return -ERANGE;
1935
1936 *converted = (unsigned int)uli;
1937 return 0;
1938 }
1939
1940 int lxc_safe_ulong(const char *numstr, unsigned long *converted)
1941 {
1942 char *err = NULL;
1943 unsigned long int uli;
1944
1945 while (isspace(*numstr))
1946 numstr++;
1947
1948 if (*numstr == '-')
1949 return -EINVAL;
1950
1951 errno = 0;
1952 uli = strtoul(numstr, &err, 0);
1953 if (errno == ERANGE && uli == ULONG_MAX)
1954 return -ERANGE;
1955
1956 if (err == numstr || *err != '\0')
1957 return -EINVAL;
1958
1959 *converted = uli;
1960 return 0;
1961 }
1962
1963 int lxc_safe_int(const char *numstr, int *converted)
1964 {
1965 char *err = NULL;
1966 signed long int sli;
1967
1968 errno = 0;
1969 sli = strtol(numstr, &err, 0);
1970 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
1971 return -ERANGE;
1972
1973 if (errno != 0 && sli == 0)
1974 return -EINVAL;
1975
1976 if (err == numstr || *err != '\0')
1977 return -EINVAL;
1978
1979 if (sli > INT_MAX || sli < INT_MIN)
1980 return -ERANGE;
1981
1982 *converted = (int)sli;
1983 return 0;
1984 }
1985
1986 int lxc_safe_long(const char *numstr, long int *converted)
1987 {
1988 char *err = NULL;
1989 signed long int sli;
1990
1991 errno = 0;
1992 sli = strtol(numstr, &err, 0);
1993 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
1994 return -ERANGE;
1995
1996 if (errno != 0 && sli == 0)
1997 return -EINVAL;
1998
1999 if (err == numstr || *err != '\0')
2000 return -EINVAL;
2001
2002 *converted = sli;
2003 return 0;
2004 }
2005
2006 int lxc_switch_uid_gid(uid_t uid, gid_t gid)
2007 {
2008 if (setgid(gid) < 0) {
2009 SYSERROR("Failed to switch to gid %d.", gid);
2010 return -errno;
2011 }
2012 NOTICE("Switched to gid %d.", gid);
2013
2014 if (setuid(uid) < 0) {
2015 SYSERROR("Failed to switch to uid %d.", uid);
2016 return -errno;
2017 }
2018 NOTICE("Switched to uid %d.", uid);
2019
2020 return 0;
2021 }
2022
2023 /* Simple covenience function which enables uniform logging. */
2024 int lxc_setgroups(int size, gid_t list[])
2025 {
2026 if (setgroups(size, list) < 0) {
2027 SYSERROR("Failed to setgroups().");
2028 return -errno;
2029 }
2030 NOTICE("Dropped additional groups.");
2031
2032 return 0;
2033 }
2034
2035 static int lxc_get_unused_loop_dev_legacy(char *loop_name)
2036 {
2037 struct dirent *dp;
2038 struct loop_info64 lo64;
2039 DIR *dir;
2040 int dfd = -1, fd = -1, ret = -1;
2041
2042 dir = opendir("/dev");
2043 if (!dir)
2044 return -1;
2045
2046 while ((dp = readdir(dir))) {
2047 if (!dp)
2048 break;
2049
2050 if (strncmp(dp->d_name, "loop", 4) != 0)
2051 continue;
2052
2053 dfd = dirfd(dir);
2054 if (dfd < 0)
2055 continue;
2056
2057 fd = openat(dfd, dp->d_name, O_RDWR);
2058 if (fd < 0)
2059 continue;
2060
2061 ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
2062 if (ret < 0) {
2063 if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 ||
2064 errno != ENXIO) {
2065 close(fd);
2066 fd = -1;
2067 continue;
2068 }
2069 }
2070
2071 ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name);
2072 if (ret < 0 || ret >= LO_NAME_SIZE) {
2073 close(fd);
2074 fd = -1;
2075 continue;
2076 }
2077
2078 break;
2079 }
2080
2081 closedir(dir);
2082
2083 if (fd < 0)
2084 return -1;
2085
2086 return fd;
2087 }
2088
2089 static int lxc_get_unused_loop_dev(char *name_loop)
2090 {
2091 int loop_nr, ret;
2092 int fd_ctl = -1, fd_tmp = -1;
2093
2094 fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC);
2095 if (fd_ctl < 0)
2096 return -ENODEV;
2097
2098 loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE);
2099 if (loop_nr < 0)
2100 goto on_error;
2101
2102 ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr);
2103 if (ret < 0 || ret >= LO_NAME_SIZE)
2104 goto on_error;
2105
2106 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
2107 if (fd_tmp < 0)
2108 goto on_error;
2109
2110 on_error:
2111 close(fd_ctl);
2112 return fd_tmp;
2113 }
2114
2115 int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags)
2116 {
2117 int ret;
2118 struct loop_info64 lo64;
2119 int fd_img = -1, fret = -1, fd_loop = -1;
2120
2121 fd_loop = lxc_get_unused_loop_dev(loop_dev);
2122 if (fd_loop < 0) {
2123 if (fd_loop == -ENODEV)
2124 fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev);
2125 else
2126 goto on_error;
2127 }
2128
2129 fd_img = open(source, O_RDWR | O_CLOEXEC);
2130 if (fd_img < 0)
2131 goto on_error;
2132
2133 ret = ioctl(fd_loop, LOOP_SET_FD, fd_img);
2134 if (ret < 0)
2135 goto on_error;
2136
2137 memset(&lo64, 0, sizeof(lo64));
2138 lo64.lo_flags = flags;
2139
2140 ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64);
2141 if (ret < 0)
2142 goto on_error;
2143
2144 fret = 0;
2145
2146 on_error:
2147 if (fd_img >= 0)
2148 close(fd_img);
2149
2150 if (fret < 0 && fd_loop >= 0) {
2151 close(fd_loop);
2152 fd_loop = -1;
2153 }
2154
2155 return fd_loop;
2156 }
2157
2158 int lxc_unstack_mountpoint(const char *path, bool lazy)
2159 {
2160 int ret;
2161 int umounts = 0;
2162
2163 pop_stack:
2164 ret = umount2(path, lazy ? MNT_DETACH : 0);
2165 if (ret < 0) {
2166 /* We consider anything else than EINVAL deadly to prevent going
2167 * into an infinite loop. (The other alternative is constantly
2168 * parsing /proc/self/mountinfo which is yucky and probably
2169 * racy.)
2170 */
2171 if (errno != EINVAL)
2172 return -errno;
2173 } else {
2174 /* Just stop counting when this happens. That'd just be so
2175 * stupid that we won't even bother trying to report back the
2176 * correct value anymore.
2177 */
2178 if (umounts != INT_MAX)
2179 umounts++;
2180 /* We succeeded in umounting. Make sure that there's no other
2181 * mountpoint stacked underneath.
2182 */
2183 goto pop_stack;
2184 }
2185
2186 return umounts;
2187 }
2188
2189 int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
2190 {
2191 pid_t child;
2192 int ret, fret, pipefd[2];
2193 ssize_t bytes;
2194
2195 /* Make sure our callers do not receive unitialized memory. */
2196 if (buf_size > 0 && buf)
2197 buf[0] = '\0';
2198
2199 if (pipe(pipefd) < 0) {
2200 SYSERROR("failed to create pipe");
2201 return -1;
2202 }
2203
2204 child = fork();
2205 if (child < 0) {
2206 close(pipefd[0]);
2207 close(pipefd[1]);
2208 SYSERROR("failed to create new process");
2209 return -1;
2210 }
2211
2212 if (child == 0) {
2213 /* Close the read-end of the pipe. */
2214 close(pipefd[0]);
2215
2216 /* Redirect std{err,out} to write-end of the
2217 * pipe.
2218 */
2219 ret = dup2(pipefd[1], STDOUT_FILENO);
2220 if (ret >= 0)
2221 ret = dup2(pipefd[1], STDERR_FILENO);
2222
2223 /* Close the write-end of the pipe. */
2224 close(pipefd[1]);
2225
2226 if (ret < 0) {
2227 SYSERROR("failed to duplicate std{err,out} file descriptor");
2228 exit(EXIT_FAILURE);
2229 }
2230
2231 /* Does not return. */
2232 child_fn(args);
2233 ERROR("failed to exec command");
2234 exit(EXIT_FAILURE);
2235 }
2236
2237 /* close the write-end of the pipe */
2238 close(pipefd[1]);
2239
2240 if (buf && buf_size > 0) {
2241 bytes = read(pipefd[0], buf, buf_size - 1);
2242 if (bytes > 0)
2243 buf[bytes - 1] = '\0';
2244 }
2245
2246 fret = wait_for_pid(child);
2247 /* close the read-end of the pipe */
2248 close(pipefd[0]);
2249
2250 return fret;
2251 }
2252
2253 char *must_make_path(const char *first, ...)
2254 {
2255 va_list args;
2256 char *cur, *dest;
2257 size_t full_len = strlen(first);
2258
2259 dest = must_copy_string(first);
2260
2261 va_start(args, first);
2262 while ((cur = va_arg(args, char *)) != NULL) {
2263 full_len += strlen(cur);
2264 if (cur[0] != '/')
2265 full_len++;
2266 dest = must_realloc(dest, full_len + 1);
2267 if (cur[0] != '/')
2268 strcat(dest, "/");
2269 strcat(dest, cur);
2270 }
2271 va_end(args);
2272
2273 return dest;
2274 }
2275
2276 char *must_copy_string(const char *entry)
2277 {
2278 char *ret;
2279
2280 if (!entry)
2281 return NULL;
2282 do {
2283 ret = strdup(entry);
2284 } while (!ret);
2285
2286 return ret;
2287 }
2288
2289 void *must_realloc(void *orig, size_t sz)
2290 {
2291 void *ret;
2292
2293 do {
2294 ret = realloc(orig, sz);
2295 } while (!ret);
2296
2297 return ret;
2298 }
2299
2300 bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val)
2301 {
2302 return (fs->f_type == (fs_type_magic)magic_val);
2303 }
2304
2305 bool has_fs_type(const char *path, fs_type_magic magic_val)
2306 {
2307 bool has_type;
2308 int ret;
2309 struct statfs sb;
2310
2311 ret = statfs(path, &sb);
2312 if (ret < 0)
2313 return false;
2314
2315 has_type = is_fs_type(&sb, magic_val);
2316 if (!has_type && magic_val == RAMFS_MAGIC)
2317 WARN("When the ramfs it a tmpfs statfs() might report tmpfs");
2318
2319 return has_type;
2320 }
2321
2322 bool lxc_nic_exists(char *nic)
2323 {
2324 #define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
2325 char path[__LXC_SYS_CLASS_NET_LEN];
2326 int ret;
2327 struct stat sb;
2328
2329 if (!strcmp(nic, "none"))
2330 return true;
2331
2332 ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic);
2333 if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN)
2334 return false;
2335
2336 ret = stat(path, &sb);
2337 if (ret < 0)
2338 return false;
2339
2340 return true;
2341 }