]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.c
Merge pull request #2116 from brauner/2018-01-29/legacy_network_fixes
[mirror_lxc.git] / src / lxc / utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "config.h"
25
26 #define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */
27 #include <ctype.h>
28 #include <dirent.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <grp.h>
32 #include <inttypes.h>
33 #include <libgen.h>
34 #include <stddef.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <sys/mman.h>
40 #include <sys/mount.h>
41 #include <sys/param.h>
42 #include <sys/prctl.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <sys/wait.h>
46
47 #include "log.h"
48 #include "lxclock.h"
49 #include "namespace.h"
50 #include "parse.h"
51 #include "utils.h"
52
53 #ifndef O_PATH
54 #define O_PATH 010000000
55 #endif
56
57 #ifndef O_NOFOLLOW
58 #define O_NOFOLLOW 00400000
59 #endif
60
61 lxc_log_define(lxc_utils, lxc);
62
63 /*
64 * if path is btrfs, tries to remove it and any subvolumes beneath it
65 */
66 extern bool btrfs_try_remove_subvol(const char *path);
67
68 static int _recursive_rmdir(const char *dirname, dev_t pdev,
69 const char *exclude, int level, bool onedev)
70 {
71 struct dirent *direntp;
72 DIR *dir;
73 int ret, failed=0;
74 char pathname[MAXPATHLEN];
75 bool hadexclude = false;
76
77 dir = opendir(dirname);
78 if (!dir) {
79 ERROR("failed to open %s", dirname);
80 return -1;
81 }
82
83 while ((direntp = readdir(dir))) {
84 struct stat mystat;
85 int rc;
86
87 if (!direntp)
88 break;
89
90 if (!strcmp(direntp->d_name, ".") ||
91 !strcmp(direntp->d_name, ".."))
92 continue;
93
94 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
95 if (rc < 0 || rc >= MAXPATHLEN) {
96 ERROR("pathname too long");
97 failed=1;
98 continue;
99 }
100
101 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
102 ret = rmdir(pathname);
103 if (ret < 0) {
104 switch(errno) {
105 case ENOTEMPTY:
106 INFO("Not deleting snapshot %s", pathname);
107 hadexclude = true;
108 break;
109 case ENOTDIR:
110 ret = unlink(pathname);
111 if (ret)
112 INFO("Failed to remove %s", pathname);
113 break;
114 default:
115 SYSERROR("Failed to rmdir %s", pathname);
116 failed = 1;
117 break;
118 }
119 }
120 continue;
121 }
122
123 ret = lstat(pathname, &mystat);
124 if (ret) {
125 ERROR("Failed to stat %s", pathname);
126 failed = 1;
127 continue;
128 }
129 if (onedev && mystat.st_dev != pdev) {
130 /* TODO should we be checking /proc/self/mountinfo for
131 * pathname and not doing this if found? */
132 if (btrfs_try_remove_subvol(pathname))
133 INFO("Removed btrfs subvolume at %s\n", pathname);
134 continue;
135 }
136 if (S_ISDIR(mystat.st_mode)) {
137 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
138 failed=1;
139 } else {
140 if (unlink(pathname) < 0) {
141 SYSERROR("Failed to delete %s", pathname);
142 failed=1;
143 }
144 }
145 }
146
147 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
148 ERROR("Failed to delete %s", dirname);
149 failed=1;
150 }
151
152 ret = closedir(dir);
153 if (ret) {
154 ERROR("Failed to close directory %s", dirname);
155 failed=1;
156 }
157
158 return failed ? -1 : 0;
159 }
160
161 /* We have two different magic values for overlayfs, yay. */
162 #ifndef OVERLAYFS_SUPER_MAGIC
163 #define OVERLAYFS_SUPER_MAGIC 0x794c764f
164 #endif
165
166 #ifndef OVERLAY_SUPER_MAGIC
167 #define OVERLAY_SUPER_MAGIC 0x794c7630
168 #endif
169
170 /* In overlayfs, st_dev is unreliable. So on overlayfs we don't do the
171 * lxc_rmdir_onedev()
172 */
173 static bool is_native_overlayfs(const char *path)
174 {
175 if (has_fs_type(path, OVERLAY_SUPER_MAGIC) ||
176 has_fs_type(path, OVERLAYFS_SUPER_MAGIC))
177 return true;
178
179 return false;
180 }
181
182 /* returns 0 on success, -1 if there were any failures */
183 extern int lxc_rmdir_onedev(const char *path, const char *exclude)
184 {
185 struct stat mystat;
186 bool onedev = true;
187
188 if (is_native_overlayfs(path))
189 onedev = false;
190
191 if (lstat(path, &mystat) < 0) {
192 if (errno == ENOENT)
193 return 0;
194
195 ERROR("Failed to stat %s", path);
196 return -1;
197 }
198
199 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
200 }
201
202 /* borrowed from iproute2 */
203 extern int get_u16(unsigned short *val, const char *arg, int base)
204 {
205 unsigned long res;
206 char *ptr;
207
208 if (!arg || !*arg)
209 return -1;
210
211 errno = 0;
212 res = strtoul(arg, &ptr, base);
213 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
214 return -1;
215
216 *val = res;
217
218 return 0;
219 }
220
221 extern int mkdir_p(const char *dir, mode_t mode)
222 {
223 const char *tmp = dir;
224 const char *orig = dir;
225 char *makeme;
226
227 do {
228 dir = tmp + strspn(tmp, "/");
229 tmp = dir + strcspn(dir, "/");
230 makeme = strndup(orig, dir - orig);
231 if (*makeme) {
232 if (mkdir(makeme, mode) && errno != EEXIST) {
233 SYSERROR("failed to create directory '%s'", makeme);
234 free(makeme);
235 return -1;
236 }
237 }
238 free(makeme);
239 } while(tmp != dir);
240
241 return 0;
242 }
243
244 char *get_rundir()
245 {
246 char *rundir;
247 const char *homedir;
248 struct stat sb;
249
250 if (stat(RUNTIME_PATH, &sb) < 0) {
251 return NULL;
252 }
253
254 if (geteuid() == sb.st_uid || getegid() == sb.st_gid) {
255 rundir = strdup(RUNTIME_PATH);
256 return rundir;
257 }
258
259 rundir = getenv("XDG_RUNTIME_DIR");
260 if (rundir) {
261 rundir = strdup(rundir);
262 return rundir;
263 }
264
265 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
266 homedir = getenv("HOME");
267 if (!homedir) {
268 ERROR("HOME isn't set in the environment.");
269 return NULL;
270 }
271
272 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
273 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
274
275 return rundir;
276 }
277
278 int wait_for_pid(pid_t pid)
279 {
280 int status, ret;
281
282 again:
283 ret = waitpid(pid, &status, 0);
284 if (ret == -1) {
285 if (errno == EINTR)
286 goto again;
287 return -1;
288 }
289 if (ret != pid)
290 goto again;
291 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
292 return -1;
293 return 0;
294 }
295
296 int lxc_wait_for_pid_status(pid_t pid)
297 {
298 int status, ret;
299
300 again:
301 ret = waitpid(pid, &status, 0);
302 if (ret == -1) {
303 if (errno == EINTR)
304 goto again;
305 return -1;
306 }
307 if (ret != pid)
308 goto again;
309 return status;
310 }
311
312 ssize_t lxc_write_nointr(int fd, const void* buf, size_t count)
313 {
314 ssize_t ret;
315 again:
316 ret = write(fd, buf, count);
317 if (ret < 0 && errno == EINTR)
318 goto again;
319 return ret;
320 }
321
322 ssize_t lxc_read_nointr(int fd, void* buf, size_t count)
323 {
324 ssize_t ret;
325 again:
326 ret = read(fd, buf, count);
327 if (ret < 0 && errno == EINTR)
328 goto again;
329 return ret;
330 }
331
332 ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf)
333 {
334 ssize_t ret;
335 ret = lxc_read_nointr(fd, buf, count);
336 if (ret <= 0)
337 return ret;
338 if ((size_t)ret != count)
339 return -1;
340 if (expected_buf && memcmp(buf, expected_buf, count) != 0) {
341 errno = EINVAL;
342 return -1;
343 }
344 return ret;
345 }
346
347 #if HAVE_LIBGNUTLS
348 #include <gnutls/gnutls.h>
349 #include <gnutls/crypto.h>
350
351 __attribute__((constructor))
352 static void gnutls_lxc_init(void)
353 {
354 gnutls_global_init();
355 }
356
357 int sha1sum_file(char *fnam, unsigned char *digest)
358 {
359 char *buf;
360 int ret;
361 FILE *f;
362 long flen;
363
364 if (!fnam)
365 return -1;
366 f = fopen_cloexec(fnam, "r");
367 if (!f) {
368 SYSERROR("Error opening template");
369 return -1;
370 }
371 if (fseek(f, 0, SEEK_END) < 0) {
372 SYSERROR("Error seeking to end of template");
373 fclose(f);
374 return -1;
375 }
376 if ((flen = ftell(f)) < 0) {
377 SYSERROR("Error telling size of template");
378 fclose(f);
379 return -1;
380 }
381 if (fseek(f, 0, SEEK_SET) < 0) {
382 SYSERROR("Error seeking to start of template");
383 fclose(f);
384 return -1;
385 }
386 if ((buf = malloc(flen+1)) == NULL) {
387 SYSERROR("Out of memory");
388 fclose(f);
389 return -1;
390 }
391 if (fread(buf, 1, flen, f) != flen) {
392 SYSERROR("Failure reading template");
393 free(buf);
394 fclose(f);
395 return -1;
396 }
397 if (fclose(f) < 0) {
398 SYSERROR("Failre closing template");
399 free(buf);
400 return -1;
401 }
402 buf[flen] = '\0';
403 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
404 free(buf);
405 return ret;
406 }
407 #endif
408
409 char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup)
410 {
411 va_list ap2;
412 size_t count = 1 + skip;
413 char **result;
414
415 /* first determine size of argument list, we don't want to reallocate
416 * constantly...
417 */
418 va_copy(ap2, ap);
419 while (1) {
420 char* arg = va_arg(ap2, char*);
421 if (!arg)
422 break;
423 count++;
424 }
425 va_end(ap2);
426
427 result = calloc(count, sizeof(char*));
428 if (!result)
429 return NULL;
430 count = skip;
431 while (1) {
432 char* arg = va_arg(ap, char*);
433 if (!arg)
434 break;
435 arg = do_strdup ? strdup(arg) : arg;
436 if (!arg)
437 goto oom;
438 result[count++] = arg;
439 }
440
441 /* calloc has already set last element to NULL*/
442 return result;
443
444 oom:
445 free(result);
446 return NULL;
447 }
448
449 const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip)
450 {
451 return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0);
452 }
453
454 struct lxc_popen_FILE *lxc_popen(const char *command)
455 {
456 int ret;
457 int pipe_fds[2];
458 pid_t child_pid;
459 struct lxc_popen_FILE *fp = NULL;
460
461 ret = pipe2(pipe_fds, O_CLOEXEC);
462 if (ret < 0)
463 return NULL;
464
465 child_pid = fork();
466 if (child_pid < 0)
467 goto on_error;
468
469 if (!child_pid) {
470 sigset_t mask;
471
472 close(pipe_fds[0]);
473
474 /* duplicate stdout */
475 if (pipe_fds[1] != STDOUT_FILENO)
476 ret = dup2(pipe_fds[1], STDOUT_FILENO);
477 else
478 ret = fcntl(pipe_fds[1], F_SETFD, 0);
479 if (ret < 0) {
480 close(pipe_fds[1]);
481 exit(EXIT_FAILURE);
482 }
483
484 /* duplicate stderr */
485 if (pipe_fds[1] != STDERR_FILENO)
486 ret = dup2(pipe_fds[1], STDERR_FILENO);
487 else
488 ret = fcntl(pipe_fds[1], F_SETFD, 0);
489 close(pipe_fds[1]);
490 if (ret < 0)
491 exit(EXIT_FAILURE);
492
493 /* unblock all signals */
494 ret = sigfillset(&mask);
495 if (ret < 0)
496 exit(EXIT_FAILURE);
497
498 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
499 if (ret < 0)
500 exit(EXIT_FAILURE);
501
502 execl("/bin/sh", "sh", "-c", command, (char *)NULL);
503 exit(127);
504 }
505
506 close(pipe_fds[1]);
507 pipe_fds[1] = -1;
508
509 fp = malloc(sizeof(*fp));
510 if (!fp)
511 goto on_error;
512
513 fp->child_pid = child_pid;
514 fp->pipe = pipe_fds[0];
515
516 fp->f = fdopen(pipe_fds[0], "r");
517 if (!fp->f)
518 goto on_error;
519
520 return fp;
521
522 on_error:
523 if (fp)
524 free(fp);
525
526 if (pipe_fds[0] >= 0)
527 close(pipe_fds[0]);
528
529 if (pipe_fds[1] >= 0)
530 close(pipe_fds[1]);
531
532 return NULL;
533 }
534
535 int lxc_pclose(struct lxc_popen_FILE *fp)
536 {
537 pid_t wait_pid;
538 int wstatus = 0;
539
540 if (!fp)
541 return -1;
542
543 do {
544 wait_pid = waitpid(fp->child_pid, &wstatus, 0);
545 } while (wait_pid < 0 && errno == EINTR);
546
547 close(fp->pipe);
548 fclose(fp->f);
549 free(fp);
550
551 if (wait_pid < 0)
552 return -1;
553
554 return wstatus;
555 }
556
557 char *lxc_string_replace(const char *needle, const char *replacement, const char *haystack)
558 {
559 ssize_t len = -1, saved_len = -1;
560 char *result = NULL;
561 size_t replacement_len = strlen(replacement);
562 size_t needle_len = strlen(needle);
563
564 /* should be executed exactly twice */
565 while (len == -1 || result == NULL) {
566 char *p;
567 char *last_p;
568 ssize_t part_len;
569
570 if (len != -1) {
571 result = calloc(1, len + 1);
572 if (!result)
573 return NULL;
574 saved_len = len;
575 }
576
577 len = 0;
578
579 for (last_p = (char *)haystack, p = strstr(last_p, needle); p; last_p = p, p = strstr(last_p, needle)) {
580 part_len = (ssize_t)(p - last_p);
581 if (result && part_len > 0)
582 memcpy(&result[len], last_p, part_len);
583 len += part_len;
584 if (result && replacement_len > 0)
585 memcpy(&result[len], replacement, replacement_len);
586 len += replacement_len;
587 p += needle_len;
588 }
589 part_len = strlen(last_p);
590 if (result && part_len > 0)
591 memcpy(&result[len], last_p, part_len);
592 len += part_len;
593 }
594
595 /* make sure we did the same thing twice,
596 * once for calculating length, the other
597 * time for copying data */
598 if (saved_len != len) {
599 free(result);
600 return NULL;
601 }
602 /* make sure we didn't overwrite any buffer,
603 * due to calloc the string should be 0-terminated */
604 if (result[len] != '\0') {
605 free(result);
606 return NULL;
607 }
608
609 return result;
610 }
611
612 bool lxc_string_in_array(const char *needle, const char **haystack)
613 {
614 for (; haystack && *haystack; haystack++)
615 if (!strcmp(needle, *haystack))
616 return true;
617 return false;
618 }
619
620 char *lxc_string_join(const char *sep, const char **parts, bool use_as_prefix)
621 {
622 char *result;
623 char **p;
624 size_t sep_len = strlen(sep);
625 size_t result_len = use_as_prefix * sep_len;
626
627 /* calculate new string length */
628 for (p = (char **)parts; *p; p++)
629 result_len += (p > (char **)parts) * sep_len + strlen(*p);
630
631 result = calloc(result_len + 1, 1);
632 if (!result)
633 return NULL;
634
635 if (use_as_prefix)
636 strcpy(result, sep);
637 for (p = (char **)parts; *p; p++) {
638 if (p > (char **)parts)
639 strcat(result, sep);
640 strcat(result, *p);
641 }
642
643 return result;
644 }
645
646 char **lxc_normalize_path(const char *path)
647 {
648 char **components;
649 char **p;
650 size_t components_len = 0;
651 size_t pos = 0;
652
653 components = lxc_string_split(path, '/');
654 if (!components)
655 return NULL;
656 for (p = components; *p; p++)
657 components_len++;
658
659 /* resolve '.' and '..' */
660 for (pos = 0; pos < components_len; ) {
661 if (!strcmp(components[pos], ".") || (!strcmp(components[pos], "..") && pos == 0)) {
662 /* eat this element */
663 free(components[pos]);
664 memmove(&components[pos], &components[pos+1], sizeof(char *) * (components_len - pos));
665 components_len--;
666 } else if (!strcmp(components[pos], "..")) {
667 /* eat this and the previous element */
668 free(components[pos - 1]);
669 free(components[pos]);
670 memmove(&components[pos-1], &components[pos+1], sizeof(char *) * (components_len - pos));
671 components_len -= 2;
672 pos--;
673 } else {
674 pos++;
675 }
676 }
677
678 return components;
679 }
680
681 char *lxc_deslashify(const char *path)
682 {
683 char *dup, *p;
684 char **parts = NULL;
685 size_t n, len;
686
687 dup = strdup(path);
688 if (!dup)
689 return NULL;
690
691 parts = lxc_normalize_path(dup);
692 if (!parts) {
693 free(dup);
694 return NULL;
695 }
696
697 /* We'll end up here if path == "///" or path == "". */
698 if (!*parts) {
699 len = strlen(dup);
700 if (!len) {
701 lxc_free_array((void **)parts, free);
702 return dup;
703 }
704 n = strcspn(dup, "/");
705 if (n == len) {
706 free(dup);
707 lxc_free_array((void **)parts, free);
708
709 p = strdup("/");
710 if (!p)
711 return NULL;
712
713 return p;
714 }
715 }
716
717 p = lxc_string_join("/", (const char **)parts, *dup == '/');
718 free(dup);
719 lxc_free_array((void **)parts, free);
720 return p;
721 }
722
723 char *lxc_append_paths(const char *first, const char *second)
724 {
725 int ret;
726 size_t len;
727 char *result = NULL;
728 const char *pattern = "%s%s";
729
730 len = strlen(first) + strlen(second) + 1;
731 if (second[0] != '/') {
732 len += 1;
733 pattern = "%s/%s";
734 }
735
736 result = calloc(1, len);
737 if (!result)
738 return NULL;
739
740 ret = snprintf(result, len, pattern, first, second);
741 if (ret < 0 || (size_t)ret >= len) {
742 free(result);
743 return NULL;
744 }
745
746 return result;
747 }
748
749 bool lxc_string_in_list(const char *needle, const char *haystack, char _sep)
750 {
751 char *token, *str, *saveptr = NULL;
752 char sep[2] = { _sep, '\0' };
753
754 if (!haystack || !needle)
755 return 0;
756
757 str = alloca(strlen(haystack)+1);
758 strcpy(str, haystack);
759 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
760 if (strcmp(needle, token) == 0)
761 return 1;
762 }
763
764 return 0;
765 }
766
767 char **lxc_string_split(const char *string, char _sep)
768 {
769 char *token, *str, *saveptr = NULL;
770 char sep[2] = {_sep, '\0'};
771 char **tmp = NULL, **result = NULL;
772 size_t result_capacity = 0;
773 size_t result_count = 0;
774 int r, saved_errno;
775
776 if (!string)
777 return calloc(1, sizeof(char *));
778
779 str = alloca(strlen(string) + 1);
780 strcpy(str, string);
781 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
782 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
783 if (r < 0)
784 goto error_out;
785 result[result_count] = strdup(token);
786 if (!result[result_count])
787 goto error_out;
788 result_count++;
789 }
790
791 /* if we allocated too much, reduce it */
792 tmp = realloc(result, (result_count + 1) * sizeof(char *));
793 if (!tmp)
794 goto error_out;
795 result = tmp;
796 /* Make sure we don't return uninitialized memory. */
797 if (result_count == 0)
798 *result = NULL;
799 return result;
800 error_out:
801 saved_errno = errno;
802 lxc_free_array((void **)result, free);
803 errno = saved_errno;
804 return NULL;
805 }
806
807 static bool complete_word(char ***result, char *start, char *end, size_t *cap, size_t *cnt)
808 {
809 int r;
810
811 r = lxc_grow_array((void ***)result, cap, 2 + *cnt, 16);
812 if (r < 0)
813 return false;
814 (*result)[*cnt] = strndup(start, end - start);
815 if (!(*result)[*cnt])
816 return false;
817 (*cnt)++;
818
819 return true;
820 }
821
822 /*
823 * Given a a string 'one two "three four"', split into three words,
824 * one, two, and "three four"
825 */
826 char **lxc_string_split_quoted(char *string)
827 {
828 char *nextword = string, *p, state;
829 char **result = NULL;
830 size_t result_capacity = 0;
831 size_t result_count = 0;
832
833 if (!string || !*string)
834 return calloc(1, sizeof(char *));
835
836 // TODO I'm *not* handling escaped quote
837 state = ' ';
838 for (p = string; *p; p++) {
839 switch(state) {
840 case ' ':
841 if (isspace(*p))
842 continue;
843 else if (*p == '"' || *p == '\'') {
844 nextword = p;
845 state = *p;
846 continue;
847 }
848 nextword = p;
849 state = 'a';
850 continue;
851 case 'a':
852 if (isspace(*p)) {
853 complete_word(&result, nextword, p, &result_capacity, &result_count);
854 state = ' ';
855 continue;
856 }
857 continue;
858 case '"':
859 case '\'':
860 if (*p == state) {
861 complete_word(&result, nextword+1, p, &result_capacity, &result_count);
862 state = ' ';
863 continue;
864 }
865 continue;
866 }
867 }
868
869 if (state == 'a')
870 complete_word(&result, nextword, p, &result_capacity, &result_count);
871
872 return realloc(result, (result_count + 1) * sizeof(char *));
873 }
874
875 char **lxc_string_split_and_trim(const char *string, char _sep)
876 {
877 char *token, *str, *saveptr = NULL;
878 char sep[2] = { _sep, '\0' };
879 char **result = NULL;
880 size_t result_capacity = 0;
881 size_t result_count = 0;
882 int r, saved_errno;
883 size_t i = 0;
884
885 if (!string)
886 return calloc(1, sizeof(char *));
887
888 str = alloca(strlen(string)+1);
889 strcpy(str, string);
890 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
891 while (token[0] == ' ' || token[0] == '\t')
892 token++;
893 i = strlen(token);
894 while (i > 0 && (token[i - 1] == ' ' || token[i - 1] == '\t')) {
895 token[i - 1] = '\0';
896 i--;
897 }
898 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
899 if (r < 0)
900 goto error_out;
901 result[result_count] = strdup(token);
902 if (!result[result_count])
903 goto error_out;
904 result_count++;
905 }
906
907 /* if we allocated too much, reduce it */
908 return realloc(result, (result_count + 1) * sizeof(char *));
909 error_out:
910 saved_errno = errno;
911 lxc_free_array((void **)result, free);
912 errno = saved_errno;
913 return NULL;
914 }
915
916 void lxc_free_array(void **array, lxc_free_fn element_free_fn)
917 {
918 void **p;
919 for (p = array; p && *p; p++)
920 element_free_fn(*p);
921 free((void*)array);
922 }
923
924 int lxc_grow_array(void ***array, size_t* capacity, size_t new_size, size_t capacity_increment)
925 {
926 size_t new_capacity;
927 void **new_array;
928
929 /* first time around, catch some trivial mistakes of the user
930 * only initializing one of these */
931 if (!*array || !*capacity) {
932 *array = NULL;
933 *capacity = 0;
934 }
935
936 new_capacity = *capacity;
937 while (new_size + 1 > new_capacity)
938 new_capacity += capacity_increment;
939 if (new_capacity != *capacity) {
940 /* we have to reallocate */
941 new_array = realloc(*array, new_capacity * sizeof(void *));
942 if (!new_array)
943 return -1;
944 memset(&new_array[*capacity], 0, (new_capacity - (*capacity)) * sizeof(void *));
945 *array = new_array;
946 *capacity = new_capacity;
947 }
948
949 /* array has sufficient elements */
950 return 0;
951 }
952
953 size_t lxc_array_len(void **array)
954 {
955 void **p;
956 size_t result = 0;
957
958 for (p = array; p && *p; p++)
959 result++;
960
961 return result;
962 }
963
964 int lxc_write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
965 {
966 int fd, saved_errno;
967 ssize_t ret;
968
969 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
970 if (fd < 0)
971 return -1;
972 ret = lxc_write_nointr(fd, buf, count);
973 if (ret < 0)
974 goto out_error;
975 if ((size_t)ret != count)
976 goto out_error;
977 if (add_newline) {
978 ret = lxc_write_nointr(fd, "\n", 1);
979 if (ret != 1)
980 goto out_error;
981 }
982 close(fd);
983 return 0;
984
985 out_error:
986 saved_errno = errno;
987 close(fd);
988 errno = saved_errno;
989 return -1;
990 }
991
992 int lxc_read_from_file(const char *filename, void* buf, size_t count)
993 {
994 int fd = -1, saved_errno;
995 ssize_t ret;
996
997 fd = open(filename, O_RDONLY | O_CLOEXEC);
998 if (fd < 0)
999 return -1;
1000
1001 if (!buf || !count) {
1002 char buf2[100];
1003 size_t count2 = 0;
1004 while ((ret = read(fd, buf2, 100)) > 0)
1005 count2 += ret;
1006 if (ret >= 0)
1007 ret = count2;
1008 } else {
1009 memset(buf, 0, count);
1010 ret = read(fd, buf, count);
1011 }
1012
1013 if (ret < 0)
1014 ERROR("read %s: %s", filename, strerror(errno));
1015
1016 saved_errno = errno;
1017 close(fd);
1018 errno = saved_errno;
1019 return ret;
1020 }
1021
1022 void **lxc_append_null_to_array(void **array, size_t count)
1023 {
1024 void **temp;
1025
1026 /* Append NULL to the array */
1027 if (count) {
1028 temp = realloc(array, (count + 1) * sizeof(*array));
1029 if (!temp) {
1030 size_t i;
1031 for (i = 0; i < count; i++)
1032 free(array[i]);
1033 free(array);
1034 return NULL;
1035 }
1036 array = temp;
1037 array[count] = NULL;
1038 }
1039 return array;
1040 }
1041
1042 int randseed(bool srand_it)
1043 {
1044 /*
1045 srand pre-seed function based on /dev/urandom
1046 */
1047 unsigned int seed = time(NULL) + getpid();
1048
1049 FILE *f;
1050 f = fopen("/dev/urandom", "r");
1051 if (f) {
1052 int ret = fread(&seed, sizeof(seed), 1, f);
1053 if (ret != 1)
1054 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno));
1055 fclose(f);
1056 }
1057
1058 if (srand_it)
1059 srand(seed);
1060
1061 return seed;
1062 }
1063
1064 uid_t get_ns_uid(uid_t orig)
1065 {
1066 char *line = NULL;
1067 size_t sz = 0;
1068 uid_t nsid, hostid, range;
1069 FILE *f = fopen("/proc/self/uid_map", "r");
1070 if (!f)
1071 return 0;
1072
1073 while (getline(&line, &sz, f) != -1) {
1074 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
1075 continue;
1076 if (hostid <= orig && hostid + range > orig) {
1077 nsid += orig - hostid;
1078 goto found;
1079 }
1080 }
1081
1082 nsid = 0;
1083 found:
1084 fclose(f);
1085 free(line);
1086 return nsid;
1087 }
1088
1089 bool dir_exists(const char *path)
1090 {
1091 struct stat sb;
1092 int ret;
1093
1094 ret = stat(path, &sb);
1095 if (ret < 0)
1096 /* Could be something other than eexist, just say "no". */
1097 return false;
1098 return S_ISDIR(sb.st_mode);
1099 }
1100
1101 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1102 * FNV has good anti collision properties and we're not worried
1103 * about pre-image resistance or one-way-ness, we're just trying to make
1104 * the name unique in the 108 bytes of space we have.
1105 */
1106 uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1107 {
1108 unsigned char *bp;
1109
1110 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1111 {
1112 /* xor the bottom with the current octet */
1113 hval ^= (uint64_t)*bp;
1114
1115 /* gcc optimised:
1116 * multiply by the 64 bit FNV magic prime mod 2^64
1117 */
1118 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1119 (hval << 7) + (hval << 8) + (hval << 40);
1120 }
1121
1122 return hval;
1123 }
1124
1125 /*
1126 * Detect whether / is mounted MS_SHARED. The only way I know of to
1127 * check that is through /proc/self/mountinfo.
1128 * I'm only checking for /. If the container rootfs or mount location
1129 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1130 * out would be too much work to be worth it.
1131 */
1132 int detect_shared_rootfs(void)
1133 {
1134 char buf[LXC_LINELEN], *p;
1135 FILE *f;
1136 int i;
1137 char *p2;
1138
1139 f = fopen("/proc/self/mountinfo", "r");
1140 if (!f)
1141 return 0;
1142 while (fgets(buf, LXC_LINELEN, f)) {
1143 for (p = buf, i = 0; p && i < 4; i++)
1144 p = strchr(p + 1, ' ');
1145 if (!p)
1146 continue;
1147 p2 = strchr(p + 1, ' ');
1148 if (!p2)
1149 continue;
1150 *p2 = '\0';
1151 if (strcmp(p + 1, "/") == 0) {
1152 /* This is '/'. Is it shared? */
1153 p = strchr(p2 + 1, ' ');
1154 if (p && strstr(p, "shared:")) {
1155 fclose(f);
1156 return 1;
1157 }
1158 }
1159 }
1160 fclose(f);
1161 return 0;
1162 }
1163
1164 bool switch_to_ns(pid_t pid, const char *ns) {
1165 int fd, ret;
1166 char nspath[MAXPATHLEN];
1167
1168 /* Switch to new ns */
1169 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
1170 if (ret < 0 || ret >= MAXPATHLEN)
1171 return false;
1172
1173 fd = open(nspath, O_RDONLY);
1174 if (fd < 0) {
1175 SYSERROR("failed to open %s", nspath);
1176 return false;
1177 }
1178
1179 ret = setns(fd, 0);
1180 if (ret) {
1181 SYSERROR("failed to set process %d to %s of %d.", pid, ns, fd);
1182 close(fd);
1183 return false;
1184 }
1185 close(fd);
1186 return true;
1187 }
1188
1189 /*
1190 * looking at fs/proc_namespace.c, it appears we can
1191 * actually expect the rootfs entry to very specifically contain
1192 * " - rootfs rootfs "
1193 * IIUC, so long as we've chrooted so that rootfs is not our root,
1194 * the rootfs entry should always be skipped in mountinfo contents.
1195 */
1196 bool detect_ramfs_rootfs(void)
1197 {
1198 FILE *f;
1199 char *p, *p2;
1200 char *line = NULL;
1201 size_t len = 0;
1202 int i;
1203
1204 f = fopen("/proc/self/mountinfo", "r");
1205 if (!f)
1206 return false;
1207
1208 while (getline(&line, &len, f) != -1) {
1209 for (p = line, i = 0; p && i < 4; i++)
1210 p = strchr(p + 1, ' ');
1211 if (!p)
1212 continue;
1213 p2 = strchr(p + 1, ' ');
1214 if (!p2)
1215 continue;
1216 *p2 = '\0';
1217 if (strcmp(p + 1, "/") == 0) {
1218 /* This is '/'. Is it the ramfs? */
1219 p = strchr(p2 + 1, '-');
1220 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
1221 free(line);
1222 fclose(f);
1223 return true;
1224 }
1225 }
1226 }
1227 free(line);
1228 fclose(f);
1229 return false;
1230 }
1231
1232 char *on_path(const char *cmd, const char *rootfs) {
1233 char *path = NULL;
1234 char *entry = NULL;
1235 char *saveptr = NULL;
1236 char cmdpath[MAXPATHLEN];
1237 int ret;
1238
1239 path = getenv("PATH");
1240 if (!path)
1241 return NULL;
1242
1243 path = strdup(path);
1244 if (!path)
1245 return NULL;
1246
1247 entry = strtok_r(path, ":", &saveptr);
1248 while (entry) {
1249 if (rootfs)
1250 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs, entry, cmd);
1251 else
1252 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
1253
1254 if (ret < 0 || ret >= MAXPATHLEN)
1255 goto next_loop;
1256
1257 if (access(cmdpath, X_OK) == 0) {
1258 free(path);
1259 return strdup(cmdpath);
1260 }
1261
1262 next_loop:
1263 entry = strtok_r(NULL, ":", &saveptr);
1264 }
1265
1266 free(path);
1267 return NULL;
1268 }
1269
1270 bool file_exists(const char *f)
1271 {
1272 struct stat statbuf;
1273
1274 return stat(f, &statbuf) == 0;
1275 }
1276
1277 bool cgns_supported(void)
1278 {
1279 return file_exists("/proc/self/ns/cgroup");
1280 }
1281
1282 /* historically lxc-init has been under /usr/lib/lxc and under
1283 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1284 */
1285 char *choose_init(const char *rootfs)
1286 {
1287 char *retv = NULL;
1288 const char *empty = "",
1289 *tmp;
1290 int ret, env_set = 0;
1291
1292 if (!getenv("PATH")) {
1293 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1294 SYSERROR("Failed to setenv");
1295 env_set = 1;
1296 }
1297
1298 retv = on_path("init.lxc", rootfs);
1299
1300 if (env_set) {
1301 if (unsetenv("PATH"))
1302 SYSERROR("Failed to unsetenv");
1303 }
1304
1305 if (retv)
1306 return retv;
1307
1308 retv = malloc(PATH_MAX);
1309 if (!retv)
1310 return NULL;
1311
1312 if (rootfs)
1313 tmp = rootfs;
1314 else
1315 tmp = empty;
1316
1317 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
1318 if (ret < 0 || ret >= PATH_MAX) {
1319 ERROR("pathname too long");
1320 goto out1;
1321 }
1322 if (access(retv, X_OK) == 0)
1323 return retv;
1324
1325 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
1326 if (ret < 0 || ret >= PATH_MAX) {
1327 ERROR("pathname too long");
1328 goto out1;
1329 }
1330 if (access(retv, X_OK) == 0)
1331 return retv;
1332
1333 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
1334 if (ret < 0 || ret >= PATH_MAX) {
1335 ERROR("pathname too long");
1336 goto out1;
1337 }
1338 if (access(retv, X_OK) == 0)
1339 return retv;
1340
1341 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
1342 if (ret < 0 || ret >= PATH_MAX) {
1343 ERROR("pathname too long");
1344 goto out1;
1345 }
1346 if (access(retv, X_OK) == 0)
1347 return retv;
1348
1349 /*
1350 * Last resort, look for the statically compiled init.lxc which we
1351 * hopefully bind-mounted in.
1352 * If we are called during container setup, and we get to this point,
1353 * then the init.lxc.static from the host will need to be bind-mounted
1354 * in. So we return NULL here to indicate that.
1355 */
1356 if (rootfs)
1357 goto out1;
1358
1359 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
1360 if (ret < 0 || ret >= PATH_MAX) {
1361 WARN("Nonsense - name /lxc.init.static too long");
1362 goto out1;
1363 }
1364 if (access(retv, X_OK) == 0)
1365 return retv;
1366
1367 out1:
1368 free(retv);
1369 return NULL;
1370 }
1371
1372 int print_to_file(const char *file, const char *content)
1373 {
1374 FILE *f;
1375 int ret = 0;
1376
1377 f = fopen(file, "w");
1378 if (!f)
1379 return -1;
1380 if (fprintf(f, "%s", content) != strlen(content))
1381 ret = -1;
1382 fclose(f);
1383 return ret;
1384 }
1385
1386 int is_dir(const char *path)
1387 {
1388 struct stat statbuf;
1389 int ret = stat(path, &statbuf);
1390 if (ret == 0 && S_ISDIR(statbuf.st_mode))
1391 return 1;
1392 return 0;
1393 }
1394
1395 /*
1396 * Given the '-t' template option to lxc-create, figure out what to
1397 * do. If the template is a full executable path, use that. If it
1398 * is something like 'sshd', then return $templatepath/lxc-sshd.
1399 * On success return the template, on error return NULL.
1400 */
1401 char *get_template_path(const char *t)
1402 {
1403 int ret, len;
1404 char *tpath;
1405
1406 if (t[0] == '/' && access(t, X_OK) == 0) {
1407 tpath = strdup(t);
1408 return tpath;
1409 }
1410
1411 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
1412 tpath = malloc(len);
1413 if (!tpath)
1414 return NULL;
1415 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
1416 if (ret < 0 || ret >= len) {
1417 free(tpath);
1418 return NULL;
1419 }
1420 if (access(tpath, X_OK) < 0) {
1421 SYSERROR("bad template: %s", t);
1422 free(tpath);
1423 return NULL;
1424 }
1425
1426 return tpath;
1427 }
1428
1429 /*
1430 * @path: a pathname where / replaced with '\0'.
1431 * @offsetp: pointer to int showing which path segment was last seen.
1432 * Updated on return to reflect the next segment.
1433 * @fulllen: full original path length.
1434 * Returns a pointer to the next path segment, or NULL if done.
1435 */
1436 static char *get_nextpath(char *path, int *offsetp, int fulllen)
1437 {
1438 int offset = *offsetp;
1439
1440 if (offset >= fulllen)
1441 return NULL;
1442
1443 while (path[offset] != '\0' && offset < fulllen)
1444 offset++;
1445 while (path[offset] == '\0' && offset < fulllen)
1446 offset++;
1447
1448 *offsetp = offset;
1449 return (offset < fulllen) ? &path[offset] : NULL;
1450 }
1451
1452 /*
1453 * Check that @subdir is a subdir of @dir. @len is the length of
1454 * @dir (to avoid having to recalculate it).
1455 */
1456 static bool is_subdir(const char *subdir, const char *dir, size_t len)
1457 {
1458 size_t subdirlen = strlen(subdir);
1459
1460 if (subdirlen < len)
1461 return false;
1462 if (strncmp(subdir, dir, len) != 0)
1463 return false;
1464 if (dir[len-1] == '/')
1465 return true;
1466 if (subdir[len] == '/' || subdirlen == len)
1467 return true;
1468 return false;
1469 }
1470
1471 /*
1472 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
1473 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
1474 */
1475 static int check_symlink(int fd)
1476 {
1477 struct stat sb;
1478 int ret = fstat(fd, &sb);
1479 if (ret < 0)
1480 return -ENOENT;
1481 if (S_ISLNK(sb.st_mode))
1482 return -ELOOP;
1483 return 0;
1484 }
1485
1486 /*
1487 * Open a file or directory, provided that it contains no symlinks.
1488 *
1489 * CAVEAT: This function must not be used for other purposes than container
1490 * setup before executing the container's init
1491 */
1492 static int open_if_safe(int dirfd, const char *nextpath)
1493 {
1494 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1495 if (newfd >= 0) /* Was not a symlink, all good. */
1496 return newfd;
1497
1498 if (errno == ELOOP)
1499 return newfd;
1500
1501 if (errno == EPERM || errno == EACCES) {
1502 /* We're not root (cause we got EPERM) so try opening with
1503 * O_PATH.
1504 */
1505 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
1506 if (newfd >= 0) {
1507 /* O_PATH will return an fd for symlinks. We know
1508 * nextpath wasn't a symlink at last openat, so if fd is
1509 * now a link, then something * fishy is going on.
1510 */
1511 int ret = check_symlink(newfd);
1512 if (ret < 0) {
1513 close(newfd);
1514 newfd = ret;
1515 }
1516 }
1517 }
1518
1519 return newfd;
1520 }
1521
1522 /*
1523 * Open a path intending for mounting, ensuring that the final path
1524 * is inside the container's rootfs.
1525 *
1526 * CAVEAT: This function must not be used for other purposes than container
1527 * setup before executing the container's init
1528 *
1529 * @target: path to be opened
1530 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1531 * would be the container's rootfs.
1532 *
1533 * Return an open fd for the path, or <0 on error.
1534 */
1535 static int open_without_symlink(const char *target, const char *prefix_skip)
1536 {
1537 int curlen = 0, dirfd, fulllen, i;
1538 char *dup = NULL;
1539
1540 fulllen = strlen(target);
1541
1542 /* make sure prefix-skip makes sense */
1543 if (prefix_skip && strlen(prefix_skip) > 0) {
1544 curlen = strlen(prefix_skip);
1545 if (!is_subdir(target, prefix_skip, curlen)) {
1546 ERROR("WHOA there - target '%s' didn't start with prefix '%s'",
1547 target, prefix_skip);
1548 return -EINVAL;
1549 }
1550 /*
1551 * get_nextpath() expects the curlen argument to be
1552 * on a (turned into \0) / or before it, so decrement
1553 * curlen to make sure that happens
1554 */
1555 if (curlen)
1556 curlen--;
1557 } else {
1558 prefix_skip = "/";
1559 curlen = 0;
1560 }
1561
1562 /* Make a copy of target which we can hack up, and tokenize it */
1563 if ((dup = strdup(target)) == NULL) {
1564 SYSERROR("Out of memory checking for symbolic link");
1565 return -ENOMEM;
1566 }
1567 for (i = 0; i < fulllen; i++) {
1568 if (dup[i] == '/')
1569 dup[i] = '\0';
1570 }
1571
1572 dirfd = open(prefix_skip, O_RDONLY);
1573 if (dirfd < 0)
1574 goto out;
1575 while (1) {
1576 int newfd, saved_errno;
1577 char *nextpath;
1578
1579 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1580 goto out;
1581 newfd = open_if_safe(dirfd, nextpath);
1582 saved_errno = errno;
1583 close(dirfd);
1584 dirfd = newfd;
1585 if (newfd < 0) {
1586 errno = saved_errno;
1587 if (errno == ELOOP)
1588 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
1589 goto out;
1590 }
1591 }
1592
1593 out:
1594 free(dup);
1595 return dirfd;
1596 }
1597
1598 /*
1599 * Safely mount a path into a container, ensuring that the mount target
1600 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1601 * uses the host's /)
1602 *
1603 * CAVEAT: This function must not be used for other purposes than container
1604 * setup before executing the container's init
1605 */
1606 int safe_mount(const char *src, const char *dest, const char *fstype,
1607 unsigned long flags, const void *data, const char *rootfs)
1608 {
1609 int destfd, ret, saved_errno;
1610 /* Only needs enough for /proc/self/fd/<fd>. */
1611 char srcbuf[50], destbuf[50];
1612 int srcfd = -1;
1613 const char *mntsrc = src;
1614
1615 if (!rootfs)
1616 rootfs = "";
1617
1618 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1619 if (flags & MS_BIND && src && src[0] != '/') {
1620 INFO("this is a relative bind mount");
1621 srcfd = open_without_symlink(src, NULL);
1622 if (srcfd < 0)
1623 return srcfd;
1624 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1625 if (ret < 0 || ret > 50) {
1626 close(srcfd);
1627 ERROR("Out of memory");
1628 return -EINVAL;
1629 }
1630 mntsrc = srcbuf;
1631 }
1632
1633 destfd = open_without_symlink(dest, rootfs);
1634 if (destfd < 0) {
1635 if (srcfd != -1) {
1636 saved_errno = errno;
1637 close(srcfd);
1638 errno = saved_errno;
1639 }
1640 return destfd;
1641 }
1642
1643 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1644 if (ret < 0 || ret > 50) {
1645 if (srcfd != -1)
1646 close(srcfd);
1647 close(destfd);
1648 ERROR("Out of memory");
1649 return -EINVAL;
1650 }
1651
1652 ret = mount(mntsrc, destbuf, fstype, flags, data);
1653 saved_errno = errno;
1654 if (srcfd != -1)
1655 close(srcfd);
1656 close(destfd);
1657 if (ret < 0) {
1658 errno = saved_errno;
1659 SYSERROR("Failed to mount %s onto %s", src ? src : "(null)", dest);
1660 return ret;
1661 }
1662
1663 return 0;
1664 }
1665
1666 /*
1667 * Mount a proc under @rootfs if proc self points to a pid other than
1668 * my own. This is needed to have a known-good proc mount for setting
1669 * up LSMs both at container startup and attach.
1670 *
1671 * @rootfs : the rootfs where proc should be mounted
1672 *
1673 * Returns < 0 on failure, 0 if the correct proc was already mounted
1674 * and 1 if a new proc was mounted.
1675 *
1676 * NOTE: not to be called from inside the container namespace!
1677 */
1678 int lxc_mount_proc_if_needed(const char *rootfs)
1679 {
1680 char path[MAXPATHLEN];
1681 int link_to_pid, linklen, mypid, ret;
1682 char link[LXC_NUMSTRLEN64] = {0};
1683
1684 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1685 if (ret < 0 || ret >= MAXPATHLEN) {
1686 SYSERROR("proc path name too long");
1687 return -1;
1688 }
1689
1690 linklen = readlink(path, link, LXC_NUMSTRLEN64);
1691
1692 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
1693 if (ret < 0 || ret >= MAXPATHLEN) {
1694 SYSERROR("proc path name too long");
1695 return -1;
1696 }
1697
1698 /* /proc not mounted */
1699 if (linklen < 0) {
1700 if (mkdir(path, 0755) && errno != EEXIST)
1701 return -1;
1702 goto domount;
1703 } else if (linklen >= LXC_NUMSTRLEN64) {
1704 link[linklen - 1] = '\0';
1705 ERROR("readlink returned truncated content: \"%s\"", link);
1706 return -1;
1707 }
1708
1709 mypid = lxc_raw_getpid();
1710 INFO("I am %d, /proc/self points to \"%s\"", mypid, link);
1711
1712 if (lxc_safe_int(link, &link_to_pid) < 0)
1713 return -1;
1714
1715 /* correct procfs is already mounted */
1716 if (link_to_pid == mypid)
1717 return 0;
1718
1719 ret = umount2(path, MNT_DETACH);
1720 if (ret < 0)
1721 WARN("failed to umount \"%s\" with MNT_DETACH", path);
1722
1723 domount:
1724 /* rootfs is NULL */
1725 if (!strcmp(rootfs, ""))
1726 ret = mount("proc", path, "proc", 0, NULL);
1727 else
1728 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
1729 if (ret < 0)
1730 return -1;
1731
1732 INFO("mounted /proc in container for security transition");
1733 return 1;
1734 }
1735
1736 int open_devnull(void)
1737 {
1738 int fd = open("/dev/null", O_RDWR);
1739
1740 if (fd < 0)
1741 SYSERROR("Can't open /dev/null");
1742
1743 return fd;
1744 }
1745
1746 int set_stdfds(int fd)
1747 {
1748 int ret;
1749
1750 if (fd < 0)
1751 return -1;
1752
1753 ret = dup2(fd, STDIN_FILENO);
1754 if (ret < 0)
1755 return -1;
1756
1757 ret = dup2(fd, STDOUT_FILENO);
1758 if (ret < 0)
1759 return -1;
1760
1761 ret = dup2(fd, STDERR_FILENO);
1762 if (ret < 0)
1763 return -1;
1764
1765 return 0;
1766 }
1767
1768 int null_stdfds(void)
1769 {
1770 int ret = -1;
1771 int fd = open_devnull();
1772
1773 if (fd >= 0) {
1774 ret = set_stdfds(fd);
1775 close(fd);
1776 }
1777
1778 return ret;
1779 }
1780
1781 /*
1782 * Return the number of lines in file @fn, or -1 on error
1783 */
1784 int lxc_count_file_lines(const char *fn)
1785 {
1786 FILE *f;
1787 char *line = NULL;
1788 size_t sz = 0;
1789 int n = 0;
1790
1791 f = fopen_cloexec(fn, "r");
1792 if (!f)
1793 return -1;
1794
1795 while (getline(&line, &sz, f) != -1) {
1796 n++;
1797 }
1798 free(line);
1799 fclose(f);
1800 return n;
1801 }
1802
1803 void *lxc_strmmap(void *addr, size_t length, int prot, int flags, int fd,
1804 off_t offset)
1805 {
1806 void *tmp = NULL, *overlap = NULL;
1807
1808 /* We establish an anonymous mapping that is one byte larger than the
1809 * underlying file. The pages handed to us are zero filled. */
1810 tmp = mmap(addr, length + 1, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1811 if (tmp == MAP_FAILED)
1812 return tmp;
1813
1814 /* Now we establish a fixed-address mapping starting at the address we
1815 * received from our anonymous mapping and replace all bytes excluding
1816 * the additional \0-byte with the file. This allows us to use normal
1817 * string-handling functions. */
1818 overlap = mmap(tmp, length, prot, MAP_FIXED | flags, fd, offset);
1819 if (overlap == MAP_FAILED)
1820 munmap(tmp, length + 1);
1821
1822 return overlap;
1823 }
1824
1825 int lxc_strmunmap(void *addr, size_t length)
1826 {
1827 return munmap(addr, length + 1);
1828 }
1829
1830 /* Check whether a signal is blocked by a process. */
1831 /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
1832 #define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
1833 bool task_blocking_signal(pid_t pid, int signal)
1834 {
1835 bool bret = false;
1836 char *line = NULL;
1837 long unsigned int sigblk = 0;
1838 size_t n = 0;
1839 int ret;
1840 FILE *f;
1841
1842 char status[__PROC_STATUS_LEN];
1843
1844 ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid);
1845 if (ret < 0 || ret >= __PROC_STATUS_LEN)
1846 return bret;
1847
1848 f = fopen(status, "r");
1849 if (!f)
1850 return bret;
1851
1852 while (getline(&line, &n, f) != -1) {
1853 if (strncmp(line, "SigBlk:\t", 8))
1854 continue;
1855
1856 if (sscanf(line + 8, "%lx", &sigblk) != 1)
1857 goto out;
1858 }
1859
1860 if (sigblk & (1LU << (signal - 1)))
1861 bret = true;
1862
1863 out:
1864 free(line);
1865 fclose(f);
1866 return bret;
1867 }
1868
1869 static int lxc_append_null_to_list(void ***list)
1870 {
1871 int newentry = 0;
1872 void **tmp;
1873
1874 if (*list)
1875 for (; (*list)[newentry]; newentry++) {
1876 ;
1877 }
1878
1879 tmp = realloc(*list, (newentry + 2) * sizeof(void **));
1880 if (!tmp)
1881 return -1;
1882
1883 *list = tmp;
1884 (*list)[newentry + 1] = NULL;
1885
1886 return newentry;
1887 }
1888
1889 int lxc_append_string(char ***list, char *entry)
1890 {
1891 char *copy;
1892 int newentry;
1893
1894 newentry = lxc_append_null_to_list((void ***)list);
1895 if (newentry < 0)
1896 return -1;
1897
1898 copy = strdup(entry);
1899 if (!copy)
1900 return -1;
1901
1902 (*list)[newentry] = copy;
1903
1904 return 0;
1905 }
1906
1907 int lxc_preserve_ns(const int pid, const char *ns)
1908 {
1909 int ret;
1910 /* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1911 #define __NS_PATH_LEN 50
1912 char path[__NS_PATH_LEN];
1913
1914 /* This way we can use this function to also check whether namespaces
1915 * are supported by the kernel by passing in the NULL or the empty
1916 * string.
1917 */
1918 ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
1919 !ns || strcmp(ns, "") == 0 ? "" : "/",
1920 !ns || strcmp(ns, "") == 0 ? "" : ns);
1921 errno = EFBIG;
1922 if (ret < 0 || (size_t)ret >= __NS_PATH_LEN)
1923 return -EFBIG;
1924
1925 return open(path, O_RDONLY | O_CLOEXEC);
1926 }
1927
1928 int lxc_safe_uint(const char *numstr, unsigned int *converted)
1929 {
1930 char *err = NULL;
1931 unsigned long int uli;
1932
1933 while (isspace(*numstr))
1934 numstr++;
1935
1936 if (*numstr == '-')
1937 return -EINVAL;
1938
1939 errno = 0;
1940 uli = strtoul(numstr, &err, 0);
1941 if (errno == ERANGE && uli == ULONG_MAX)
1942 return -ERANGE;
1943
1944 if (err == numstr || *err != '\0')
1945 return -EINVAL;
1946
1947 if (uli > UINT_MAX)
1948 return -ERANGE;
1949
1950 *converted = (unsigned int)uli;
1951 return 0;
1952 }
1953
1954 int lxc_safe_ulong(const char *numstr, unsigned long *converted)
1955 {
1956 char *err = NULL;
1957 unsigned long int uli;
1958
1959 while (isspace(*numstr))
1960 numstr++;
1961
1962 if (*numstr == '-')
1963 return -EINVAL;
1964
1965 errno = 0;
1966 uli = strtoul(numstr, &err, 0);
1967 if (errno == ERANGE && uli == ULONG_MAX)
1968 return -ERANGE;
1969
1970 if (err == numstr || *err != '\0')
1971 return -EINVAL;
1972
1973 *converted = uli;
1974 return 0;
1975 }
1976
1977 int lxc_safe_int(const char *numstr, int *converted)
1978 {
1979 char *err = NULL;
1980 signed long int sli;
1981
1982 errno = 0;
1983 sli = strtol(numstr, &err, 0);
1984 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
1985 return -ERANGE;
1986
1987 if (errno != 0 && sli == 0)
1988 return -EINVAL;
1989
1990 if (err == numstr || *err != '\0')
1991 return -EINVAL;
1992
1993 if (sli > INT_MAX || sli < INT_MIN)
1994 return -ERANGE;
1995
1996 *converted = (int)sli;
1997 return 0;
1998 }
1999
2000 int lxc_safe_long(const char *numstr, long int *converted)
2001 {
2002 char *err = NULL;
2003 signed long int sli;
2004
2005 errno = 0;
2006 sli = strtol(numstr, &err, 0);
2007 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
2008 return -ERANGE;
2009
2010 if (errno != 0 && sli == 0)
2011 return -EINVAL;
2012
2013 if (err == numstr || *err != '\0')
2014 return -EINVAL;
2015
2016 *converted = sli;
2017 return 0;
2018 }
2019
2020 int lxc_safe_long_long(const char *numstr, long long int *converted)
2021 {
2022 char *err = NULL;
2023 signed long long int sli;
2024
2025 errno = 0;
2026 sli = strtoll(numstr, &err, 0);
2027 if (errno == ERANGE && (sli == LLONG_MAX || sli == LLONG_MIN))
2028 return -ERANGE;
2029
2030 if (errno != 0 && sli == 0)
2031 return -EINVAL;
2032
2033 if (err == numstr || *err != '\0')
2034 return -EINVAL;
2035
2036 *converted = sli;
2037 return 0;
2038 }
2039
2040 int lxc_switch_uid_gid(uid_t uid, gid_t gid)
2041 {
2042 if (setgid(gid) < 0) {
2043 SYSERROR("Failed to switch to gid %d.", gid);
2044 return -errno;
2045 }
2046 NOTICE("Switched to gid %d.", gid);
2047
2048 if (setuid(uid) < 0) {
2049 SYSERROR("Failed to switch to uid %d.", uid);
2050 return -errno;
2051 }
2052 NOTICE("Switched to uid %d.", uid);
2053
2054 return 0;
2055 }
2056
2057 /* Simple covenience function which enables uniform logging. */
2058 int lxc_setgroups(int size, gid_t list[])
2059 {
2060 if (setgroups(size, list) < 0) {
2061 SYSERROR("Failed to setgroups().");
2062 return -errno;
2063 }
2064 NOTICE("Dropped additional groups.");
2065
2066 return 0;
2067 }
2068
2069 static int lxc_get_unused_loop_dev_legacy(char *loop_name)
2070 {
2071 struct dirent *dp;
2072 struct loop_info64 lo64;
2073 DIR *dir;
2074 int dfd = -1, fd = -1, ret = -1;
2075
2076 dir = opendir("/dev");
2077 if (!dir)
2078 return -1;
2079
2080 while ((dp = readdir(dir))) {
2081 if (!dp)
2082 break;
2083
2084 if (strncmp(dp->d_name, "loop", 4) != 0)
2085 continue;
2086
2087 dfd = dirfd(dir);
2088 if (dfd < 0)
2089 continue;
2090
2091 fd = openat(dfd, dp->d_name, O_RDWR);
2092 if (fd < 0)
2093 continue;
2094
2095 ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
2096 if (ret < 0) {
2097 if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 ||
2098 errno != ENXIO) {
2099 close(fd);
2100 fd = -1;
2101 continue;
2102 }
2103 }
2104
2105 ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name);
2106 if (ret < 0 || ret >= LO_NAME_SIZE) {
2107 close(fd);
2108 fd = -1;
2109 continue;
2110 }
2111
2112 break;
2113 }
2114
2115 closedir(dir);
2116
2117 if (fd < 0)
2118 return -1;
2119
2120 return fd;
2121 }
2122
2123 static int lxc_get_unused_loop_dev(char *name_loop)
2124 {
2125 int loop_nr, ret;
2126 int fd_ctl = -1, fd_tmp = -1;
2127
2128 fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC);
2129 if (fd_ctl < 0)
2130 return -ENODEV;
2131
2132 loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE);
2133 if (loop_nr < 0)
2134 goto on_error;
2135
2136 ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr);
2137 if (ret < 0 || ret >= LO_NAME_SIZE)
2138 goto on_error;
2139
2140 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
2141 if (fd_tmp < 0)
2142 goto on_error;
2143
2144 on_error:
2145 close(fd_ctl);
2146 return fd_tmp;
2147 }
2148
2149 int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags)
2150 {
2151 int ret;
2152 struct loop_info64 lo64;
2153 int fd_img = -1, fret = -1, fd_loop = -1;
2154
2155 fd_loop = lxc_get_unused_loop_dev(loop_dev);
2156 if (fd_loop < 0) {
2157 if (fd_loop == -ENODEV)
2158 fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev);
2159 else
2160 goto on_error;
2161 }
2162
2163 fd_img = open(source, O_RDWR | O_CLOEXEC);
2164 if (fd_img < 0)
2165 goto on_error;
2166
2167 ret = ioctl(fd_loop, LOOP_SET_FD, fd_img);
2168 if (ret < 0)
2169 goto on_error;
2170
2171 memset(&lo64, 0, sizeof(lo64));
2172 lo64.lo_flags = flags;
2173
2174 ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64);
2175 if (ret < 0)
2176 goto on_error;
2177
2178 fret = 0;
2179
2180 on_error:
2181 if (fd_img >= 0)
2182 close(fd_img);
2183
2184 if (fret < 0 && fd_loop >= 0) {
2185 close(fd_loop);
2186 fd_loop = -1;
2187 }
2188
2189 return fd_loop;
2190 }
2191
2192 int lxc_unstack_mountpoint(const char *path, bool lazy)
2193 {
2194 int ret;
2195 int umounts = 0;
2196
2197 pop_stack:
2198 ret = umount2(path, lazy ? MNT_DETACH : 0);
2199 if (ret < 0) {
2200 /* We consider anything else than EINVAL deadly to prevent going
2201 * into an infinite loop. (The other alternative is constantly
2202 * parsing /proc/self/mountinfo which is yucky and probably
2203 * racy.)
2204 */
2205 if (errno != EINVAL)
2206 return -errno;
2207 } else {
2208 /* Just stop counting when this happens. That'd just be so
2209 * stupid that we won't even bother trying to report back the
2210 * correct value anymore.
2211 */
2212 if (umounts != INT_MAX)
2213 umounts++;
2214 /* We succeeded in umounting. Make sure that there's no other
2215 * mountpoint stacked underneath.
2216 */
2217 goto pop_stack;
2218 }
2219
2220 return umounts;
2221 }
2222
2223 int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
2224 {
2225 pid_t child;
2226 int ret, fret, pipefd[2];
2227 ssize_t bytes;
2228
2229 /* Make sure our callers do not receive unitialized memory. */
2230 if (buf_size > 0 && buf)
2231 buf[0] = '\0';
2232
2233 if (pipe(pipefd) < 0) {
2234 SYSERROR("failed to create pipe");
2235 return -1;
2236 }
2237
2238 child = lxc_raw_clone(0);
2239 if (child < 0) {
2240 close(pipefd[0]);
2241 close(pipefd[1]);
2242 SYSERROR("failed to create new process");
2243 return -1;
2244 }
2245
2246 if (child == 0) {
2247 /* Close the read-end of the pipe. */
2248 close(pipefd[0]);
2249
2250 /* Redirect std{err,out} to write-end of the
2251 * pipe.
2252 */
2253 ret = dup2(pipefd[1], STDOUT_FILENO);
2254 if (ret >= 0)
2255 ret = dup2(pipefd[1], STDERR_FILENO);
2256
2257 /* Close the write-end of the pipe. */
2258 close(pipefd[1]);
2259
2260 if (ret < 0) {
2261 SYSERROR("failed to duplicate std{err,out} file descriptor");
2262 exit(EXIT_FAILURE);
2263 }
2264
2265 /* Does not return. */
2266 child_fn(args);
2267 ERROR("failed to exec command");
2268 exit(EXIT_FAILURE);
2269 }
2270
2271 /* close the write-end of the pipe */
2272 close(pipefd[1]);
2273
2274 if (buf && buf_size > 0) {
2275 bytes = read(pipefd[0], buf, buf_size - 1);
2276 if (bytes > 0)
2277 buf[bytes - 1] = '\0';
2278 }
2279
2280 fret = wait_for_pid(child);
2281 /* close the read-end of the pipe */
2282 close(pipefd[0]);
2283
2284 return fret;
2285 }
2286
2287 char *must_make_path(const char *first, ...)
2288 {
2289 va_list args;
2290 char *cur, *dest;
2291 size_t full_len = strlen(first);
2292
2293 dest = must_copy_string(first);
2294
2295 va_start(args, first);
2296 while ((cur = va_arg(args, char *)) != NULL) {
2297 full_len += strlen(cur);
2298 if (cur[0] != '/')
2299 full_len++;
2300 dest = must_realloc(dest, full_len + 1);
2301 if (cur[0] != '/')
2302 strcat(dest, "/");
2303 strcat(dest, cur);
2304 }
2305 va_end(args);
2306
2307 return dest;
2308 }
2309
2310 char *must_copy_string(const char *entry)
2311 {
2312 char *ret;
2313
2314 if (!entry)
2315 return NULL;
2316 do {
2317 ret = strdup(entry);
2318 } while (!ret);
2319
2320 return ret;
2321 }
2322
2323 void *must_realloc(void *orig, size_t sz)
2324 {
2325 void *ret;
2326
2327 do {
2328 ret = realloc(orig, sz);
2329 } while (!ret);
2330
2331 return ret;
2332 }
2333
2334 bool is_fs_type(const struct statfs *fs, fs_type_magic magic_val)
2335 {
2336 return (fs->f_type == (fs_type_magic)magic_val);
2337 }
2338
2339 bool has_fs_type(const char *path, fs_type_magic magic_val)
2340 {
2341 bool has_type;
2342 int ret;
2343 struct statfs sb;
2344
2345 ret = statfs(path, &sb);
2346 if (ret < 0)
2347 return false;
2348
2349 has_type = is_fs_type(&sb, magic_val);
2350 if (!has_type && magic_val == RAMFS_MAGIC)
2351 WARN("When the ramfs it a tmpfs statfs() might report tmpfs");
2352
2353 return has_type;
2354 }
2355
2356 bool lxc_nic_exists(char *nic)
2357 {
2358 #define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
2359 char path[__LXC_SYS_CLASS_NET_LEN];
2360 int ret;
2361 struct stat sb;
2362
2363 if (!strcmp(nic, "none"))
2364 return true;
2365
2366 ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic);
2367 if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN)
2368 return false;
2369
2370 ret = stat(path, &sb);
2371 if (ret < 0)
2372 return false;
2373
2374 return true;
2375 }
2376
2377 int lxc_make_tmpfile(char *template, bool rm)
2378 {
2379 int fd, ret;
2380
2381 fd = mkstemp(template);
2382 if (fd < 0)
2383 return -1;
2384
2385 if (!rm)
2386 return fd;
2387
2388 ret = unlink(template);
2389 if (ret < 0) {
2390 close(fd);
2391 return -1;
2392 }
2393
2394 return fd;
2395 }
2396
2397 int parse_byte_size_string(const char *s, int64_t *converted)
2398 {
2399 int ret, suffix_len;
2400 long long int conv;
2401 int64_t mltpl, overflow;
2402 char *end;
2403 char dup[LXC_NUMSTRLEN64 + 2];
2404 char suffix[3] = {0};
2405
2406 if (!s || !strcmp(s, ""))
2407 return -EINVAL;
2408
2409 end = stpncpy(dup, s, sizeof(dup));
2410 if (*end != '\0')
2411 return -EINVAL;
2412
2413 if (isdigit(*(end - 1)))
2414 suffix_len = 0;
2415 else if (isalpha(*(end - 1)))
2416 suffix_len = 1;
2417 else
2418 return -EINVAL;
2419
2420 if (suffix_len > 0 && (end - 2) == dup && !isdigit(*(end - 2)))
2421 return -EINVAL;
2422
2423 if (suffix_len > 0 && isalpha(*(end - 2)))
2424 suffix_len++;
2425
2426 if (suffix_len > 0) {
2427 memcpy(suffix, end - suffix_len, suffix_len);
2428 *(suffix + suffix_len) = '\0';
2429 *(end - suffix_len) = '\0';
2430 }
2431 dup[lxc_char_right_gc(dup, strlen(dup))] = '\0';
2432
2433 ret = lxc_safe_long_long(dup, &conv);
2434 if (ret < 0)
2435 return -ret;
2436
2437 if (suffix_len != 2) {
2438 *converted = conv;
2439 return 0;
2440 }
2441
2442 if (!strcmp(suffix, "kB"))
2443 mltpl = 1024;
2444 else if (!strcmp(suffix, "MB"))
2445 mltpl = 1024 * 1024;
2446 else if (!strcmp(suffix, "GB"))
2447 mltpl = 1024 * 1024 * 1024;
2448 else
2449 return -EINVAL;
2450
2451 overflow = conv * mltpl;
2452 if (conv != 0 && (overflow / conv) != mltpl)
2453 return -ERANGE;
2454
2455 *converted = overflow;
2456 return 0;
2457 }
2458
2459 uint64_t lxc_find_next_power2(uint64_t n)
2460 {
2461 /* 0 is not valid input. We return 0 to the caller since 0 is not a
2462 * valid power of two.
2463 */
2464 if (n == 0)
2465 return 0;
2466
2467 if (!(n & (n - 1)))
2468 return n;
2469
2470 while (n & (n - 1))
2471 n = n & (n - 1);
2472
2473 n = n << 1;
2474 return n;
2475 }