]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.c
commands: add lxc_cmd_state_server()
[mirror_lxc.git] / src / lxc / utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "config.h"
25
26 #include <ctype.h>
27 #include <dirent.h>
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <grp.h>
31 #include <libgen.h>
32 #include <stddef.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 #include <sys/mman.h>
38 #include <sys/mount.h>
39 #include <sys/param.h>
40 #include <sys/prctl.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 #include <sys/vfs.h>
44 #include <sys/wait.h>
45
46 #include "log.h"
47 #include "lxclock.h"
48 #include "namespace.h"
49 #include "utils.h"
50
51 #ifndef PR_SET_MM
52 #define PR_SET_MM 35
53 #endif
54
55 #ifndef PR_SET_MM_MAP
56 #define PR_SET_MM_MAP 14
57
58 struct prctl_mm_map {
59 uint64_t start_code;
60 uint64_t end_code;
61 uint64_t start_data;
62 uint64_t end_data;
63 uint64_t start_brk;
64 uint64_t brk;
65 uint64_t start_stack;
66 uint64_t arg_start;
67 uint64_t arg_end;
68 uint64_t env_start;
69 uint64_t env_end;
70 uint64_t *auxv;
71 uint32_t auxv_size;
72 uint32_t exe_fd;
73 };
74 #endif
75
76 #ifndef O_PATH
77 #define O_PATH 010000000
78 #endif
79
80 #ifndef O_NOFOLLOW
81 #define O_NOFOLLOW 00400000
82 #endif
83
84 lxc_log_define(lxc_utils, lxc);
85
86 /*
87 * if path is btrfs, tries to remove it and any subvolumes beneath it
88 */
89 extern bool btrfs_try_remove_subvol(const char *path);
90
91 static int _recursive_rmdir(char *dirname, dev_t pdev,
92 const char *exclude, int level, bool onedev)
93 {
94 struct dirent *direntp;
95 DIR *dir;
96 int ret, failed=0;
97 char pathname[MAXPATHLEN];
98 bool hadexclude = false;
99
100 dir = opendir(dirname);
101 if (!dir) {
102 ERROR("%s: failed to open %s", __func__, dirname);
103 return -1;
104 }
105
106 while ((direntp = readdir(dir))) {
107 struct stat mystat;
108 int rc;
109
110 if (!direntp)
111 break;
112
113 if (!strcmp(direntp->d_name, ".") ||
114 !strcmp(direntp->d_name, ".."))
115 continue;
116
117 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
118 if (rc < 0 || rc >= MAXPATHLEN) {
119 ERROR("pathname too long");
120 failed=1;
121 continue;
122 }
123
124 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
125 ret = rmdir(pathname);
126 if (ret < 0) {
127 switch(errno) {
128 case ENOTEMPTY:
129 INFO("Not deleting snapshot %s", pathname);
130 hadexclude = true;
131 break;
132 case ENOTDIR:
133 ret = unlink(pathname);
134 if (ret)
135 INFO("%s: failed to remove %s", __func__, pathname);
136 break;
137 default:
138 SYSERROR("%s: failed to rmdir %s", __func__, pathname);
139 failed = 1;
140 break;
141 }
142 }
143 continue;
144 }
145
146 ret = lstat(pathname, &mystat);
147 if (ret) {
148 ERROR("%s: failed to stat %s", __func__, pathname);
149 failed = 1;
150 continue;
151 }
152 if (onedev && mystat.st_dev != pdev) {
153 /* TODO should we be checking /proc/self/mountinfo for
154 * pathname and not doing this if found? */
155 if (btrfs_try_remove_subvol(pathname))
156 INFO("Removed btrfs subvolume at %s\n", pathname);
157 continue;
158 }
159 if (S_ISDIR(mystat.st_mode)) {
160 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
161 failed=1;
162 } else {
163 if (unlink(pathname) < 0) {
164 SYSERROR("%s: failed to delete %s", __func__, pathname);
165 failed=1;
166 }
167 }
168 }
169
170 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
171 ERROR("%s: failed to delete %s", __func__, dirname);
172 failed=1;
173 }
174
175 ret = closedir(dir);
176 if (ret) {
177 ERROR("%s: failed to close directory %s", __func__, dirname);
178 failed=1;
179 }
180
181 return failed ? -1 : 0;
182 }
183
184 /* we have two different magic values for overlayfs, yay */
185 #define OVERLAYFS_SUPER_MAGIC 0x794c764f
186 #define OVERLAY_SUPER_MAGIC 0x794c7630
187 /*
188 * In overlayfs, st_dev is unreliable. so on overlayfs we don't do
189 * the lxc_rmdir_onedev()
190 */
191 static bool is_native_overlayfs(const char *path)
192 {
193 struct statfs sb;
194
195 if (statfs(path, &sb) < 0)
196 return false;
197 if (sb.f_type == OVERLAYFS_SUPER_MAGIC ||
198 sb.f_type == OVERLAY_SUPER_MAGIC)
199 return true;
200 return false;
201 }
202
203 /* returns 0 on success, -1 if there were any failures */
204 extern int lxc_rmdir_onedev(char *path, const char *exclude)
205 {
206 struct stat mystat;
207 bool onedev = true;
208
209 if (is_native_overlayfs(path)) {
210 onedev = false;
211 }
212
213 if (lstat(path, &mystat) < 0) {
214 if (errno == ENOENT)
215 return 0;
216 ERROR("%s: failed to stat %s", __func__, path);
217 return -1;
218 }
219
220 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
221 }
222
223 /* borrowed from iproute2 */
224 extern int get_u16(unsigned short *val, const char *arg, int base)
225 {
226 unsigned long res;
227 char *ptr;
228
229 if (!arg || !*arg)
230 return -1;
231
232 errno = 0;
233 res = strtoul(arg, &ptr, base);
234 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
235 return -1;
236
237 *val = res;
238
239 return 0;
240 }
241
242 extern int mkdir_p(const char *dir, mode_t mode)
243 {
244 const char *tmp = dir;
245 const char *orig = dir;
246 char *makeme;
247
248 do {
249 dir = tmp + strspn(tmp, "/");
250 tmp = dir + strcspn(dir, "/");
251 makeme = strndup(orig, dir - orig);
252 if (*makeme) {
253 if (mkdir(makeme, mode) && errno != EEXIST) {
254 SYSERROR("failed to create directory '%s'", makeme);
255 free(makeme);
256 return -1;
257 }
258 }
259 free(makeme);
260 } while(tmp != dir);
261
262 return 0;
263 }
264
265 char *get_rundir()
266 {
267 char *rundir;
268 const char *homedir;
269
270 if (geteuid() == 0) {
271 rundir = strdup(RUNTIME_PATH);
272 return rundir;
273 }
274
275 rundir = getenv("XDG_RUNTIME_DIR");
276 if (rundir) {
277 rundir = strdup(rundir);
278 return rundir;
279 }
280
281 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
282 homedir = getenv("HOME");
283 if (!homedir) {
284 ERROR("HOME isn't set in the environment.");
285 return NULL;
286 }
287
288 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
289 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
290
291 return rundir;
292 }
293
294 int wait_for_pid(pid_t pid)
295 {
296 int status, ret;
297
298 again:
299 ret = waitpid(pid, &status, 0);
300 if (ret == -1) {
301 if (errno == EINTR)
302 goto again;
303 return -1;
304 }
305 if (ret != pid)
306 goto again;
307 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
308 return -1;
309 return 0;
310 }
311
312 int lxc_wait_for_pid_status(pid_t pid)
313 {
314 int status, ret;
315
316 again:
317 ret = waitpid(pid, &status, 0);
318 if (ret == -1) {
319 if (errno == EINTR)
320 goto again;
321 return -1;
322 }
323 if (ret != pid)
324 goto again;
325 return status;
326 }
327
328 ssize_t lxc_write_nointr(int fd, const void* buf, size_t count)
329 {
330 ssize_t ret;
331 again:
332 ret = write(fd, buf, count);
333 if (ret < 0 && errno == EINTR)
334 goto again;
335 return ret;
336 }
337
338 ssize_t lxc_read_nointr(int fd, void* buf, size_t count)
339 {
340 ssize_t ret;
341 again:
342 ret = read(fd, buf, count);
343 if (ret < 0 && errno == EINTR)
344 goto again;
345 return ret;
346 }
347
348 ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf)
349 {
350 ssize_t ret;
351 ret = lxc_read_nointr(fd, buf, count);
352 if (ret <= 0)
353 return ret;
354 if ((size_t)ret != count)
355 return -1;
356 if (expected_buf && memcmp(buf, expected_buf, count) != 0) {
357 errno = EINVAL;
358 return -1;
359 }
360 return ret;
361 }
362
363 #if HAVE_LIBGNUTLS
364 #include <gnutls/gnutls.h>
365 #include <gnutls/crypto.h>
366
367 __attribute__((constructor))
368 static void gnutls_lxc_init(void)
369 {
370 gnutls_global_init();
371 }
372
373 int sha1sum_file(char *fnam, unsigned char *digest)
374 {
375 char *buf;
376 int ret;
377 FILE *f;
378 long flen;
379
380 if (!fnam)
381 return -1;
382 f = fopen_cloexec(fnam, "r");
383 if (!f) {
384 SYSERROR("Error opening template");
385 return -1;
386 }
387 if (fseek(f, 0, SEEK_END) < 0) {
388 SYSERROR("Error seeking to end of template");
389 fclose(f);
390 return -1;
391 }
392 if ((flen = ftell(f)) < 0) {
393 SYSERROR("Error telling size of template");
394 fclose(f);
395 return -1;
396 }
397 if (fseek(f, 0, SEEK_SET) < 0) {
398 SYSERROR("Error seeking to start of template");
399 fclose(f);
400 return -1;
401 }
402 if ((buf = malloc(flen+1)) == NULL) {
403 SYSERROR("Out of memory");
404 fclose(f);
405 return -1;
406 }
407 if (fread(buf, 1, flen, f) != flen) {
408 SYSERROR("Failure reading template");
409 free(buf);
410 fclose(f);
411 return -1;
412 }
413 if (fclose(f) < 0) {
414 SYSERROR("Failre closing template");
415 free(buf);
416 return -1;
417 }
418 buf[flen] = '\0';
419 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
420 free(buf);
421 return ret;
422 }
423 #endif
424
425 char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup)
426 {
427 va_list ap2;
428 size_t count = 1 + skip;
429 char **result;
430
431 /* first determine size of argument list, we don't want to reallocate
432 * constantly...
433 */
434 va_copy(ap2, ap);
435 while (1) {
436 char* arg = va_arg(ap2, char*);
437 if (!arg)
438 break;
439 count++;
440 }
441 va_end(ap2);
442
443 result = calloc(count, sizeof(char*));
444 if (!result)
445 return NULL;
446 count = skip;
447 while (1) {
448 char* arg = va_arg(ap, char*);
449 if (!arg)
450 break;
451 arg = do_strdup ? strdup(arg) : arg;
452 if (!arg)
453 goto oom;
454 result[count++] = arg;
455 }
456
457 /* calloc has already set last element to NULL*/
458 return result;
459
460 oom:
461 free(result);
462 return NULL;
463 }
464
465 const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip)
466 {
467 return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0);
468 }
469
470 extern struct lxc_popen_FILE *lxc_popen(const char *command)
471 {
472 struct lxc_popen_FILE *fp = NULL;
473 int parent_end = -1, child_end = -1;
474 int pipe_fds[2];
475 pid_t child_pid;
476
477 int r = pipe2(pipe_fds, O_CLOEXEC);
478
479 if (r < 0) {
480 ERROR("pipe2 failure");
481 return NULL;
482 }
483
484 parent_end = pipe_fds[0];
485 child_end = pipe_fds[1];
486
487 child_pid = fork();
488
489 if (child_pid == 0) {
490 /* child */
491 int child_std_end = STDOUT_FILENO;
492
493 if (child_end != child_std_end) {
494 /* dup2() doesn't dup close-on-exec flag */
495 dup2(child_end, child_std_end);
496
497 /* it's safe not to close child_end here
498 * as it's marked close-on-exec anyway
499 */
500 } else {
501 /*
502 * The descriptor is already the one we will use.
503 * But it must not be marked close-on-exec.
504 * Undo the effects.
505 */
506 if (fcntl(child_end, F_SETFD, 0) != 0) {
507 SYSERROR("Failed to remove FD_CLOEXEC from fd.");
508 exit(127);
509 }
510 }
511
512 /*
513 * Unblock signals.
514 * This is the main/only reason
515 * why we do our lousy popen() emulation.
516 */
517 {
518 sigset_t mask;
519 sigfillset(&mask);
520 sigprocmask(SIG_UNBLOCK, &mask, NULL);
521 }
522
523 execl("/bin/sh", "sh", "-c", command, (char *) NULL);
524 exit(127);
525 }
526
527 /* parent */
528
529 close(child_end);
530 child_end = -1;
531
532 if (child_pid < 0) {
533 ERROR("fork failure");
534 goto error;
535 }
536
537 fp = calloc(1, sizeof(*fp));
538 if (!fp) {
539 ERROR("failed to allocate memory");
540 goto error;
541 }
542
543 fp->f = fdopen(parent_end, "r");
544 if (!fp->f) {
545 ERROR("fdopen failure");
546 goto error;
547 }
548
549 fp->child_pid = child_pid;
550
551 return fp;
552
553 error:
554
555 if (fp) {
556 if (fp->f) {
557 fclose(fp->f);
558 parent_end = -1; /* so we do not close it second time */
559 }
560
561 free(fp);
562 }
563
564 if (parent_end != -1)
565 close(parent_end);
566
567 return NULL;
568 }
569
570 extern int lxc_pclose(struct lxc_popen_FILE *fp)
571 {
572 FILE *f = NULL;
573 pid_t child_pid = 0;
574 int wstatus = 0;
575 pid_t wait_pid;
576
577 if (fp) {
578 f = fp->f;
579 child_pid = fp->child_pid;
580 /* free memory (we still need to close file stream) */
581 free(fp);
582 fp = NULL;
583 }
584
585 if (!f || fclose(f)) {
586 ERROR("fclose failure");
587 return -1;
588 }
589
590 do {
591 wait_pid = waitpid(child_pid, &wstatus, 0);
592 } while (wait_pid == -1 && errno == EINTR);
593
594 if (wait_pid == -1) {
595 ERROR("waitpid failure");
596 return -1;
597 }
598
599 return wstatus;
600 }
601
602 char *lxc_string_replace(const char *needle, const char *replacement, const char *haystack)
603 {
604 ssize_t len = -1, saved_len = -1;
605 char *result = NULL;
606 size_t replacement_len = strlen(replacement);
607 size_t needle_len = strlen(needle);
608
609 /* should be executed exactly twice */
610 while (len == -1 || result == NULL) {
611 char *p;
612 char *last_p;
613 ssize_t part_len;
614
615 if (len != -1) {
616 result = calloc(1, len + 1);
617 if (!result)
618 return NULL;
619 saved_len = len;
620 }
621
622 len = 0;
623
624 for (last_p = (char *)haystack, p = strstr(last_p, needle); p; last_p = p, p = strstr(last_p, needle)) {
625 part_len = (ssize_t)(p - last_p);
626 if (result && part_len > 0)
627 memcpy(&result[len], last_p, part_len);
628 len += part_len;
629 if (result && replacement_len > 0)
630 memcpy(&result[len], replacement, replacement_len);
631 len += replacement_len;
632 p += needle_len;
633 }
634 part_len = strlen(last_p);
635 if (result && part_len > 0)
636 memcpy(&result[len], last_p, part_len);
637 len += part_len;
638 }
639
640 /* make sure we did the same thing twice,
641 * once for calculating length, the other
642 * time for copying data */
643 if (saved_len != len) {
644 free(result);
645 return NULL;
646 }
647 /* make sure we didn't overwrite any buffer,
648 * due to calloc the string should be 0-terminated */
649 if (result[len] != '\0') {
650 free(result);
651 return NULL;
652 }
653
654 return result;
655 }
656
657 bool lxc_string_in_array(const char *needle, const char **haystack)
658 {
659 for (; haystack && *haystack; haystack++)
660 if (!strcmp(needle, *haystack))
661 return true;
662 return false;
663 }
664
665 char *lxc_string_join(const char *sep, const char **parts, bool use_as_prefix)
666 {
667 char *result;
668 char **p;
669 size_t sep_len = strlen(sep);
670 size_t result_len = use_as_prefix * sep_len;
671
672 /* calculate new string length */
673 for (p = (char **)parts; *p; p++)
674 result_len += (p > (char **)parts) * sep_len + strlen(*p);
675
676 result = calloc(result_len + 1, 1);
677 if (!result)
678 return NULL;
679
680 if (use_as_prefix)
681 strcpy(result, sep);
682 for (p = (char **)parts; *p; p++) {
683 if (p > (char **)parts)
684 strcat(result, sep);
685 strcat(result, *p);
686 }
687
688 return result;
689 }
690
691 char **lxc_normalize_path(const char *path)
692 {
693 char **components;
694 char **p;
695 size_t components_len = 0;
696 size_t pos = 0;
697
698 components = lxc_string_split(path, '/');
699 if (!components)
700 return NULL;
701 for (p = components; *p; p++)
702 components_len++;
703
704 /* resolve '.' and '..' */
705 for (pos = 0; pos < components_len; ) {
706 if (!strcmp(components[pos], ".") || (!strcmp(components[pos], "..") && pos == 0)) {
707 /* eat this element */
708 free(components[pos]);
709 memmove(&components[pos], &components[pos+1], sizeof(char *) * (components_len - pos));
710 components_len--;
711 } else if (!strcmp(components[pos], "..")) {
712 /* eat this and the previous element */
713 free(components[pos - 1]);
714 free(components[pos]);
715 memmove(&components[pos-1], &components[pos+1], sizeof(char *) * (components_len - pos));
716 components_len -= 2;
717 pos--;
718 } else {
719 pos++;
720 }
721 }
722
723 return components;
724 }
725
726 bool lxc_deslashify(char **path)
727 {
728 bool ret = false;
729 char *p;
730 char **parts = NULL;
731 size_t n, len;
732
733 parts = lxc_normalize_path(*path);
734 if (!parts)
735 return false;
736
737 /* We'll end up here if path == "///" or path == "". */
738 if (!*parts) {
739 len = strlen(*path);
740 if (!len) {
741 ret = true;
742 goto out;
743 }
744 n = strcspn(*path, "/");
745 if (n == len) {
746 p = strdup("/");
747 if (!p)
748 goto out;
749 free(*path);
750 *path = p;
751 ret = true;
752 goto out;
753 }
754 }
755
756 p = lxc_string_join("/", (const char **)parts, **path == '/');
757 if (!p)
758 goto out;
759
760 free(*path);
761 *path = p;
762 ret = true;
763
764 out:
765 lxc_free_array((void **)parts, free);
766 return ret;
767 }
768
769 char *lxc_append_paths(const char *first, const char *second)
770 {
771 size_t len = strlen(first) + strlen(second) + 1;
772 const char *pattern = "%s%s";
773 char *result = NULL;
774
775 if (second[0] != '/') {
776 len += 1;
777 pattern = "%s/%s";
778 }
779
780 result = calloc(1, len);
781 if (!result)
782 return NULL;
783
784 snprintf(result, len, pattern, first, second);
785 return result;
786 }
787
788 bool lxc_string_in_list(const char *needle, const char *haystack, char _sep)
789 {
790 char *token, *str, *saveptr = NULL;
791 char sep[2] = { _sep, '\0' };
792
793 if (!haystack || !needle)
794 return 0;
795
796 str = alloca(strlen(haystack)+1);
797 strcpy(str, haystack);
798 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
799 if (strcmp(needle, token) == 0)
800 return 1;
801 }
802
803 return 0;
804 }
805
806 char **lxc_string_split(const char *string, char _sep)
807 {
808 char *token, *str, *saveptr = NULL;
809 char sep[2] = {_sep, '\0'};
810 char **tmp = NULL, **result = NULL;
811 size_t result_capacity = 0;
812 size_t result_count = 0;
813 int r, saved_errno;
814
815 if (!string)
816 return calloc(1, sizeof(char *));
817
818 str = alloca(strlen(string) + 1);
819 strcpy(str, string);
820 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
821 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
822 if (r < 0)
823 goto error_out;
824 result[result_count] = strdup(token);
825 if (!result[result_count])
826 goto error_out;
827 result_count++;
828 }
829
830 /* if we allocated too much, reduce it */
831 tmp = realloc(result, (result_count + 1) * sizeof(char *));
832 if (!tmp)
833 goto error_out;
834 result = tmp;
835 /* Make sure we don't return uninitialized memory. */
836 if (result_count == 0)
837 *result = NULL;
838 return result;
839 error_out:
840 saved_errno = errno;
841 lxc_free_array((void **)result, free);
842 errno = saved_errno;
843 return NULL;
844 }
845
846 char **lxc_string_split_and_trim(const char *string, char _sep)
847 {
848 char *token, *str, *saveptr = NULL;
849 char sep[2] = { _sep, '\0' };
850 char **result = NULL;
851 size_t result_capacity = 0;
852 size_t result_count = 0;
853 int r, saved_errno;
854 size_t i = 0;
855
856 if (!string)
857 return calloc(1, sizeof(char *));
858
859 str = alloca(strlen(string)+1);
860 strcpy(str, string);
861 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
862 while (token[0] == ' ' || token[0] == '\t')
863 token++;
864 i = strlen(token);
865 while (i > 0 && (token[i - 1] == ' ' || token[i - 1] == '\t')) {
866 token[i - 1] = '\0';
867 i--;
868 }
869 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
870 if (r < 0)
871 goto error_out;
872 result[result_count] = strdup(token);
873 if (!result[result_count])
874 goto error_out;
875 result_count++;
876 }
877
878 /* if we allocated too much, reduce it */
879 return realloc(result, (result_count + 1) * sizeof(char *));
880 error_out:
881 saved_errno = errno;
882 lxc_free_array((void **)result, free);
883 errno = saved_errno;
884 return NULL;
885 }
886
887 void lxc_free_array(void **array, lxc_free_fn element_free_fn)
888 {
889 void **p;
890 for (p = array; p && *p; p++)
891 element_free_fn(*p);
892 free((void*)array);
893 }
894
895 int lxc_grow_array(void ***array, size_t* capacity, size_t new_size, size_t capacity_increment)
896 {
897 size_t new_capacity;
898 void **new_array;
899
900 /* first time around, catch some trivial mistakes of the user
901 * only initializing one of these */
902 if (!*array || !*capacity) {
903 *array = NULL;
904 *capacity = 0;
905 }
906
907 new_capacity = *capacity;
908 while (new_size + 1 > new_capacity)
909 new_capacity += capacity_increment;
910 if (new_capacity != *capacity) {
911 /* we have to reallocate */
912 new_array = realloc(*array, new_capacity * sizeof(void *));
913 if (!new_array)
914 return -1;
915 memset(&new_array[*capacity], 0, (new_capacity - (*capacity)) * sizeof(void *));
916 *array = new_array;
917 *capacity = new_capacity;
918 }
919
920 /* array has sufficient elements */
921 return 0;
922 }
923
924 size_t lxc_array_len(void **array)
925 {
926 void **p;
927 size_t result = 0;
928
929 for (p = array; p && *p; p++)
930 result++;
931
932 return result;
933 }
934
935 int lxc_write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
936 {
937 int fd, saved_errno;
938 ssize_t ret;
939
940 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
941 if (fd < 0)
942 return -1;
943 ret = lxc_write_nointr(fd, buf, count);
944 if (ret < 0)
945 goto out_error;
946 if ((size_t)ret != count)
947 goto out_error;
948 if (add_newline) {
949 ret = lxc_write_nointr(fd, "\n", 1);
950 if (ret != 1)
951 goto out_error;
952 }
953 close(fd);
954 return 0;
955
956 out_error:
957 saved_errno = errno;
958 close(fd);
959 errno = saved_errno;
960 return -1;
961 }
962
963 int lxc_read_from_file(const char *filename, void* buf, size_t count)
964 {
965 int fd = -1, saved_errno;
966 ssize_t ret;
967
968 fd = open(filename, O_RDONLY | O_CLOEXEC);
969 if (fd < 0)
970 return -1;
971
972 if (!buf || !count) {
973 char buf2[100];
974 size_t count2 = 0;
975 while ((ret = read(fd, buf2, 100)) > 0)
976 count2 += ret;
977 if (ret >= 0)
978 ret = count2;
979 } else {
980 memset(buf, 0, count);
981 ret = read(fd, buf, count);
982 }
983
984 if (ret < 0)
985 ERROR("read %s: %s", filename, strerror(errno));
986
987 saved_errno = errno;
988 close(fd);
989 errno = saved_errno;
990 return ret;
991 }
992
993 void **lxc_append_null_to_array(void **array, size_t count)
994 {
995 void **temp;
996
997 /* Append NULL to the array */
998 if (count) {
999 temp = realloc(array, (count + 1) * sizeof(*array));
1000 if (!temp) {
1001 size_t i;
1002 for (i = 0; i < count; i++)
1003 free(array[i]);
1004 free(array);
1005 return NULL;
1006 }
1007 array = temp;
1008 array[count] = NULL;
1009 }
1010 return array;
1011 }
1012
1013 int randseed(bool srand_it)
1014 {
1015 /*
1016 srand pre-seed function based on /dev/urandom
1017 */
1018 unsigned int seed = time(NULL) + getpid();
1019
1020 FILE *f;
1021 f = fopen("/dev/urandom", "r");
1022 if (f) {
1023 int ret = fread(&seed, sizeof(seed), 1, f);
1024 if (ret != 1)
1025 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno));
1026 fclose(f);
1027 }
1028
1029 if (srand_it)
1030 srand(seed);
1031
1032 return seed;
1033 }
1034
1035 uid_t get_ns_uid(uid_t orig)
1036 {
1037 char *line = NULL;
1038 size_t sz = 0;
1039 uid_t nsid, hostid, range;
1040 FILE *f = fopen("/proc/self/uid_map", "r");
1041 if (!f)
1042 return 0;
1043
1044 while (getline(&line, &sz, f) != -1) {
1045 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
1046 continue;
1047 if (hostid <= orig && hostid + range > orig) {
1048 nsid += orig - hostid;
1049 goto found;
1050 }
1051 }
1052
1053 nsid = 0;
1054 found:
1055 fclose(f);
1056 free(line);
1057 return nsid;
1058 }
1059
1060 bool dir_exists(const char *path)
1061 {
1062 struct stat sb;
1063 int ret;
1064
1065 ret = stat(path, &sb);
1066 if (ret < 0)
1067 // could be something other than eexist, just say no
1068 return false;
1069 return S_ISDIR(sb.st_mode);
1070 }
1071
1072 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1073 * FNV has good anti collision properties and we're not worried
1074 * about pre-image resistance or one-way-ness, we're just trying to make
1075 * the name unique in the 108 bytes of space we have.
1076 */
1077 uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1078 {
1079 unsigned char *bp;
1080
1081 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1082 {
1083 /* xor the bottom with the current octet */
1084 hval ^= (uint64_t)*bp;
1085
1086 /* gcc optimised:
1087 * multiply by the 64 bit FNV magic prime mod 2^64
1088 */
1089 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1090 (hval << 7) + (hval << 8) + (hval << 40);
1091 }
1092
1093 return hval;
1094 }
1095
1096 /*
1097 * Detect whether / is mounted MS_SHARED. The only way I know of to
1098 * check that is through /proc/self/mountinfo.
1099 * I'm only checking for /. If the container rootfs or mount location
1100 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1101 * out would be too much work to be worth it.
1102 */
1103 int detect_shared_rootfs(void)
1104 {
1105 char buf[LXC_LINELEN], *p;
1106 FILE *f;
1107 int i;
1108 char *p2;
1109
1110 f = fopen("/proc/self/mountinfo", "r");
1111 if (!f)
1112 return 0;
1113 while (fgets(buf, LXC_LINELEN, f)) {
1114 for (p = buf, i = 0; p && i < 4; i++)
1115 p = strchr(p + 1, ' ');
1116 if (!p)
1117 continue;
1118 p2 = strchr(p + 1, ' ');
1119 if (!p2)
1120 continue;
1121 *p2 = '\0';
1122 if (strcmp(p + 1, "/") == 0) {
1123 // this is '/'. is it shared?
1124 p = strchr(p2 + 1, ' ');
1125 if (p && strstr(p, "shared:")) {
1126 fclose(f);
1127 return 1;
1128 }
1129 }
1130 }
1131 fclose(f);
1132 return 0;
1133 }
1134
1135 bool switch_to_ns(pid_t pid, const char *ns) {
1136 int fd, ret;
1137 char nspath[MAXPATHLEN];
1138
1139 /* Switch to new ns */
1140 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
1141 if (ret < 0 || ret >= MAXPATHLEN)
1142 return false;
1143
1144 fd = open(nspath, O_RDONLY);
1145 if (fd < 0) {
1146 SYSERROR("failed to open %s", nspath);
1147 return false;
1148 }
1149
1150 ret = setns(fd, 0);
1151 if (ret) {
1152 SYSERROR("failed to set process %d to %s of %d.", pid, ns, fd);
1153 close(fd);
1154 return false;
1155 }
1156 close(fd);
1157 return true;
1158 }
1159
1160 /*
1161 * looking at fs/proc_namespace.c, it appears we can
1162 * actually expect the rootfs entry to very specifically contain
1163 * " - rootfs rootfs "
1164 * IIUC, so long as we've chrooted so that rootfs is not our root,
1165 * the rootfs entry should always be skipped in mountinfo contents.
1166 */
1167 bool detect_ramfs_rootfs(void)
1168 {
1169 FILE *f;
1170 char *p, *p2;
1171 char *line = NULL;
1172 size_t len = 0;
1173 int i;
1174
1175 f = fopen("/proc/self/mountinfo", "r");
1176 if (!f)
1177 return false;
1178
1179 while (getline(&line, &len, f) != -1) {
1180 for (p = line, i = 0; p && i < 4; i++)
1181 p = strchr(p + 1, ' ');
1182 if (!p)
1183 continue;
1184 p2 = strchr(p + 1, ' ');
1185 if (!p2)
1186 continue;
1187 *p2 = '\0';
1188 if (strcmp(p + 1, "/") == 0) {
1189 // this is '/'. is it the ramfs?
1190 p = strchr(p2 + 1, '-');
1191 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
1192 free(line);
1193 fclose(f);
1194 return true;
1195 }
1196 }
1197 }
1198 free(line);
1199 fclose(f);
1200 return false;
1201 }
1202
1203 char *on_path(const char *cmd, const char *rootfs) {
1204 char *path = NULL;
1205 char *entry = NULL;
1206 char *saveptr = NULL;
1207 char cmdpath[MAXPATHLEN];
1208 int ret;
1209
1210 path = getenv("PATH");
1211 if (!path)
1212 return NULL;
1213
1214 path = strdup(path);
1215 if (!path)
1216 return NULL;
1217
1218 entry = strtok_r(path, ":", &saveptr);
1219 while (entry) {
1220 if (rootfs)
1221 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs, entry, cmd);
1222 else
1223 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
1224
1225 if (ret < 0 || ret >= MAXPATHLEN)
1226 goto next_loop;
1227
1228 if (access(cmdpath, X_OK) == 0) {
1229 free(path);
1230 return strdup(cmdpath);
1231 }
1232
1233 next_loop:
1234 entry = strtok_r(NULL, ":", &saveptr);
1235 }
1236
1237 free(path);
1238 return NULL;
1239 }
1240
1241 bool file_exists(const char *f)
1242 {
1243 struct stat statbuf;
1244
1245 return stat(f, &statbuf) == 0;
1246 }
1247
1248 bool cgns_supported(void)
1249 {
1250 return file_exists("/proc/self/ns/cgroup");
1251 }
1252
1253 /* historically lxc-init has been under /usr/lib/lxc and under
1254 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1255 */
1256 char *choose_init(const char *rootfs)
1257 {
1258 char *retv = NULL;
1259 const char *empty = "",
1260 *tmp;
1261 int ret, env_set = 0;
1262 struct stat mystat;
1263
1264 if (!getenv("PATH")) {
1265 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1266 SYSERROR("Failed to setenv");
1267 env_set = 1;
1268 }
1269
1270 retv = on_path("init.lxc", rootfs);
1271
1272 if (env_set) {
1273 if (unsetenv("PATH"))
1274 SYSERROR("Failed to unsetenv");
1275 }
1276
1277 if (retv)
1278 return retv;
1279
1280 retv = malloc(PATH_MAX);
1281 if (!retv)
1282 return NULL;
1283
1284 if (rootfs)
1285 tmp = rootfs;
1286 else
1287 tmp = empty;
1288
1289 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
1290 if (ret < 0 || ret >= PATH_MAX) {
1291 ERROR("pathname too long");
1292 goto out1;
1293 }
1294
1295 ret = stat(retv, &mystat);
1296 if (ret == 0)
1297 return retv;
1298
1299 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
1300 if (ret < 0 || ret >= PATH_MAX) {
1301 ERROR("pathname too long");
1302 goto out1;
1303 }
1304
1305 ret = stat(retv, &mystat);
1306 if (ret == 0)
1307 return retv;
1308
1309 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
1310 if (ret < 0 || ret >= PATH_MAX) {
1311 ERROR("pathname too long");
1312 goto out1;
1313 }
1314 ret = stat(retv, &mystat);
1315 if (ret == 0)
1316 return retv;
1317
1318 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
1319 if (ret < 0 || ret >= PATH_MAX) {
1320 ERROR("pathname too long");
1321 goto out1;
1322 }
1323 ret = stat(retv, &mystat);
1324 if (ret == 0)
1325 return retv;
1326
1327 /*
1328 * Last resort, look for the statically compiled init.lxc which we
1329 * hopefully bind-mounted in.
1330 * If we are called during container setup, and we get to this point,
1331 * then the init.lxc.static from the host will need to be bind-mounted
1332 * in. So we return NULL here to indicate that.
1333 */
1334 if (rootfs)
1335 goto out1;
1336
1337 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
1338 if (ret < 0 || ret >= PATH_MAX) {
1339 WARN("Nonsense - name /lxc.init.static too long");
1340 goto out1;
1341 }
1342 ret = stat(retv, &mystat);
1343 if (ret == 0)
1344 return retv;
1345
1346 out1:
1347 free(retv);
1348 return NULL;
1349 }
1350
1351 int print_to_file(const char *file, const char *content)
1352 {
1353 FILE *f;
1354 int ret = 0;
1355
1356 f = fopen(file, "w");
1357 if (!f)
1358 return -1;
1359 if (fprintf(f, "%s", content) != strlen(content))
1360 ret = -1;
1361 fclose(f);
1362 return ret;
1363 }
1364
1365 int is_dir(const char *path)
1366 {
1367 struct stat statbuf;
1368 int ret = stat(path, &statbuf);
1369 if (ret == 0 && S_ISDIR(statbuf.st_mode))
1370 return 1;
1371 return 0;
1372 }
1373
1374 /*
1375 * Given the '-t' template option to lxc-create, figure out what to
1376 * do. If the template is a full executable path, use that. If it
1377 * is something like 'sshd', then return $templatepath/lxc-sshd.
1378 * On success return the template, on error return NULL.
1379 */
1380 char *get_template_path(const char *t)
1381 {
1382 int ret, len;
1383 char *tpath;
1384
1385 if (t[0] == '/' && access(t, X_OK) == 0) {
1386 tpath = strdup(t);
1387 return tpath;
1388 }
1389
1390 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
1391 tpath = malloc(len);
1392 if (!tpath)
1393 return NULL;
1394 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
1395 if (ret < 0 || ret >= len) {
1396 free(tpath);
1397 return NULL;
1398 }
1399 if (access(tpath, X_OK) < 0) {
1400 SYSERROR("bad template: %s", t);
1401 free(tpath);
1402 return NULL;
1403 }
1404
1405 return tpath;
1406 }
1407
1408 /*
1409 * Sets the process title to the specified title. Note that this may fail if
1410 * the kernel doesn't support PR_SET_MM_MAP (kernels <3.18).
1411 */
1412 int setproctitle(char *title)
1413 {
1414 static char *proctitle = NULL;
1415 char buf[2048], *tmp;
1416 FILE *f;
1417 int i, len, ret = 0;
1418
1419 /* We don't really need to know all of this stuff, but unfortunately
1420 * PR_SET_MM_MAP requires us to set it all at once, so we have to
1421 * figure it out anyway.
1422 */
1423 unsigned long start_data, end_data, start_brk, start_code, end_code,
1424 start_stack, arg_start, arg_end, env_start, env_end,
1425 brk_val;
1426 struct prctl_mm_map prctl_map;
1427
1428 f = fopen_cloexec("/proc/self/stat", "r");
1429 if (!f) {
1430 return -1;
1431 }
1432
1433 tmp = fgets(buf, sizeof(buf), f);
1434 fclose(f);
1435 if (!tmp) {
1436 return -1;
1437 }
1438
1439 /* Skip the first 25 fields, column 26-28 are start_code, end_code,
1440 * and start_stack */
1441 tmp = strchr(buf, ' ');
1442 for (i = 0; i < 24; i++) {
1443 if (!tmp)
1444 return -1;
1445 tmp = strchr(tmp+1, ' ');
1446 }
1447 if (!tmp)
1448 return -1;
1449
1450 i = sscanf(tmp, "%lu %lu %lu", &start_code, &end_code, &start_stack);
1451 if (i != 3)
1452 return -1;
1453
1454 /* Skip the next 19 fields, column 45-51 are start_data to arg_end */
1455 for (i = 0; i < 19; i++) {
1456 if (!tmp)
1457 return -1;
1458 tmp = strchr(tmp+1, ' ');
1459 }
1460
1461 if (!tmp)
1462 return -1;
1463
1464 i = sscanf(tmp, "%lu %lu %lu %*u %*u %lu %lu",
1465 &start_data,
1466 &end_data,
1467 &start_brk,
1468 &env_start,
1469 &env_end);
1470 if (i != 5)
1471 return -1;
1472
1473 /* Include the null byte here, because in the calculations below we
1474 * want to have room for it. */
1475 len = strlen(title) + 1;
1476
1477 proctitle = realloc(proctitle, len);
1478 if (!proctitle)
1479 return -1;
1480
1481 arg_start = (unsigned long) proctitle;
1482 arg_end = arg_start + len;
1483
1484 brk_val = syscall(__NR_brk, 0);
1485
1486 prctl_map = (struct prctl_mm_map) {
1487 .start_code = start_code,
1488 .end_code = end_code,
1489 .start_stack = start_stack,
1490 .start_data = start_data,
1491 .end_data = end_data,
1492 .start_brk = start_brk,
1493 .brk = brk_val,
1494 .arg_start = arg_start,
1495 .arg_end = arg_end,
1496 .env_start = env_start,
1497 .env_end = env_end,
1498 .auxv = NULL,
1499 .auxv_size = 0,
1500 .exe_fd = -1,
1501 };
1502
1503 ret = prctl(PR_SET_MM, PR_SET_MM_MAP, (long) &prctl_map, sizeof(prctl_map), 0);
1504 if (ret == 0)
1505 strcpy((char*)arg_start, title);
1506 else
1507 INFO("setting cmdline failed - %s", strerror(errno));
1508
1509 return ret;
1510 }
1511
1512 /*
1513 * @path: a pathname where / replaced with '\0'.
1514 * @offsetp: pointer to int showing which path segment was last seen.
1515 * Updated on return to reflect the next segment.
1516 * @fulllen: full original path length.
1517 * Returns a pointer to the next path segment, or NULL if done.
1518 */
1519 static char *get_nextpath(char *path, int *offsetp, int fulllen)
1520 {
1521 int offset = *offsetp;
1522
1523 if (offset >= fulllen)
1524 return NULL;
1525
1526 while (path[offset] != '\0' && offset < fulllen)
1527 offset++;
1528 while (path[offset] == '\0' && offset < fulllen)
1529 offset++;
1530
1531 *offsetp = offset;
1532 return (offset < fulllen) ? &path[offset] : NULL;
1533 }
1534
1535 /*
1536 * Check that @subdir is a subdir of @dir. @len is the length of
1537 * @dir (to avoid having to recalculate it).
1538 */
1539 static bool is_subdir(const char *subdir, const char *dir, size_t len)
1540 {
1541 size_t subdirlen = strlen(subdir);
1542
1543 if (subdirlen < len)
1544 return false;
1545 if (strncmp(subdir, dir, len) != 0)
1546 return false;
1547 if (dir[len-1] == '/')
1548 return true;
1549 if (subdir[len] == '/' || subdirlen == len)
1550 return true;
1551 return false;
1552 }
1553
1554 /*
1555 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
1556 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
1557 */
1558 static int check_symlink(int fd)
1559 {
1560 struct stat sb;
1561 int ret = fstat(fd, &sb);
1562 if (ret < 0)
1563 return -ENOENT;
1564 if (S_ISLNK(sb.st_mode))
1565 return -ELOOP;
1566 return 0;
1567 }
1568
1569 /*
1570 * Open a file or directory, provided that it contains no symlinks.
1571 *
1572 * CAVEAT: This function must not be used for other purposes than container
1573 * setup before executing the container's init
1574 */
1575 static int open_if_safe(int dirfd, const char *nextpath)
1576 {
1577 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1578 if (newfd >= 0) // was not a symlink, all good
1579 return newfd;
1580
1581 if (errno == ELOOP)
1582 return newfd;
1583
1584 if (errno == EPERM || errno == EACCES) {
1585 /* we're not root (cause we got EPERM) so
1586 try opening with O_PATH */
1587 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
1588 if (newfd >= 0) {
1589 /* O_PATH will return an fd for symlinks. We know
1590 * nextpath wasn't a symlink at last openat, so if fd
1591 * is now a link, then something * fishy is going on
1592 */
1593 int ret = check_symlink(newfd);
1594 if (ret < 0) {
1595 close(newfd);
1596 newfd = ret;
1597 }
1598 }
1599 }
1600
1601 return newfd;
1602 }
1603
1604 /*
1605 * Open a path intending for mounting, ensuring that the final path
1606 * is inside the container's rootfs.
1607 *
1608 * CAVEAT: This function must not be used for other purposes than container
1609 * setup before executing the container's init
1610 *
1611 * @target: path to be opened
1612 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1613 * would be the container's rootfs.
1614 *
1615 * Return an open fd for the path, or <0 on error.
1616 */
1617 static int open_without_symlink(const char *target, const char *prefix_skip)
1618 {
1619 int curlen = 0, dirfd, fulllen, i;
1620 char *dup = NULL;
1621
1622 fulllen = strlen(target);
1623
1624 /* make sure prefix-skip makes sense */
1625 if (prefix_skip && strlen(prefix_skip) > 0) {
1626 curlen = strlen(prefix_skip);
1627 if (!is_subdir(target, prefix_skip, curlen)) {
1628 ERROR("WHOA there - target '%s' didn't start with prefix '%s'",
1629 target, prefix_skip);
1630 return -EINVAL;
1631 }
1632 /*
1633 * get_nextpath() expects the curlen argument to be
1634 * on a (turned into \0) / or before it, so decrement
1635 * curlen to make sure that happens
1636 */
1637 if (curlen)
1638 curlen--;
1639 } else {
1640 prefix_skip = "/";
1641 curlen = 0;
1642 }
1643
1644 /* Make a copy of target which we can hack up, and tokenize it */
1645 if ((dup = strdup(target)) == NULL) {
1646 SYSERROR("Out of memory checking for symbolic link");
1647 return -ENOMEM;
1648 }
1649 for (i = 0; i < fulllen; i++) {
1650 if (dup[i] == '/')
1651 dup[i] = '\0';
1652 }
1653
1654 dirfd = open(prefix_skip, O_RDONLY);
1655 if (dirfd < 0)
1656 goto out;
1657 while (1) {
1658 int newfd, saved_errno;
1659 char *nextpath;
1660
1661 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1662 goto out;
1663 newfd = open_if_safe(dirfd, nextpath);
1664 saved_errno = errno;
1665 close(dirfd);
1666 dirfd = newfd;
1667 if (newfd < 0) {
1668 errno = saved_errno;
1669 if (errno == ELOOP)
1670 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
1671 goto out;
1672 }
1673 }
1674
1675 out:
1676 free(dup);
1677 return dirfd;
1678 }
1679
1680 /*
1681 * Safely mount a path into a container, ensuring that the mount target
1682 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1683 * uses the host's /)
1684 *
1685 * CAVEAT: This function must not be used for other purposes than container
1686 * setup before executing the container's init
1687 */
1688 int safe_mount(const char *src, const char *dest, const char *fstype,
1689 unsigned long flags, const void *data, const char *rootfs)
1690 {
1691 int srcfd = -1, destfd, ret, saved_errno;
1692 char srcbuf[50], destbuf[50]; // only needs enough for /proc/self/fd/<fd>
1693 const char *mntsrc = src;
1694
1695 if (!rootfs)
1696 rootfs = "";
1697
1698 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1699 if (flags & MS_BIND && src && src[0] != '/') {
1700 INFO("this is a relative bind mount");
1701 srcfd = open_without_symlink(src, NULL);
1702 if (srcfd < 0)
1703 return srcfd;
1704 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1705 if (ret < 0 || ret > 50) {
1706 close(srcfd);
1707 ERROR("Out of memory");
1708 return -EINVAL;
1709 }
1710 mntsrc = srcbuf;
1711 }
1712
1713 destfd = open_without_symlink(dest, rootfs);
1714 if (destfd < 0) {
1715 if (srcfd != -1) {
1716 saved_errno = errno;
1717 close(srcfd);
1718 errno = saved_errno;
1719 }
1720 return destfd;
1721 }
1722
1723 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1724 if (ret < 0 || ret > 50) {
1725 if (srcfd != -1)
1726 close(srcfd);
1727 close(destfd);
1728 ERROR("Out of memory");
1729 return -EINVAL;
1730 }
1731
1732 ret = mount(mntsrc, destbuf, fstype, flags, data);
1733 saved_errno = errno;
1734 if (srcfd != -1)
1735 close(srcfd);
1736 close(destfd);
1737 if (ret < 0) {
1738 errno = saved_errno;
1739 SYSERROR("Failed to mount %s onto %s", src, dest);
1740 return ret;
1741 }
1742
1743 return 0;
1744 }
1745
1746 /*
1747 * Mount a proc under @rootfs if proc self points to a pid other than
1748 * my own. This is needed to have a known-good proc mount for setting
1749 * up LSMs both at container startup and attach.
1750 *
1751 * @rootfs : the rootfs where proc should be mounted
1752 *
1753 * Returns < 0 on failure, 0 if the correct proc was already mounted
1754 * and 1 if a new proc was mounted.
1755 *
1756 * NOTE: not to be called from inside the container namespace!
1757 */
1758 int lxc_mount_proc_if_needed(const char *rootfs)
1759 {
1760 char path[MAXPATHLEN];
1761 int link_to_pid, linklen, mypid, ret;
1762 char link[LXC_NUMSTRLEN64] = {0};
1763
1764 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1765 if (ret < 0 || ret >= MAXPATHLEN) {
1766 SYSERROR("proc path name too long");
1767 return -1;
1768 }
1769
1770 linklen = readlink(path, link, LXC_NUMSTRLEN64);
1771
1772 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
1773 if (ret < 0 || ret >= MAXPATHLEN) {
1774 SYSERROR("proc path name too long");
1775 return -1;
1776 }
1777
1778 /* /proc not mounted */
1779 if (linklen < 0) {
1780 if (mkdir(path, 0755) && errno != EEXIST)
1781 return -1;
1782 goto domount;
1783 } else if (linklen >= LXC_NUMSTRLEN64) {
1784 link[linklen - 1] = '\0';
1785 ERROR("readlink returned truncated content: \"%s\"", link);
1786 return -1;
1787 }
1788
1789 mypid = getpid();
1790 INFO("I am %d, /proc/self points to \"%s\"", mypid, link);
1791
1792 if (lxc_safe_int(link, &link_to_pid) < 0)
1793 return -1;
1794
1795 /* correct procfs is already mounted */
1796 if (link_to_pid == mypid)
1797 return 0;
1798
1799 ret = umount2(path, MNT_DETACH);
1800 if (ret < 0)
1801 WARN("failed to umount \"%s\" with MNT_DETACH", path);
1802
1803 domount:
1804 /* rootfs is NULL */
1805 if (!strcmp(rootfs, ""))
1806 ret = mount("proc", path, "proc", 0, NULL);
1807 else
1808 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
1809 if (ret < 0)
1810 return -1;
1811
1812 INFO("mounted /proc in container for security transition");
1813 return 1;
1814 }
1815
1816 int open_devnull(void)
1817 {
1818 int fd = open("/dev/null", O_RDWR);
1819
1820 if (fd < 0)
1821 SYSERROR("Can't open /dev/null");
1822
1823 return fd;
1824 }
1825
1826 int set_stdfds(int fd)
1827 {
1828 if (fd < 0)
1829 return -1;
1830
1831 if (dup2(fd, 0) < 0)
1832 return -1;
1833 if (dup2(fd, 1) < 0)
1834 return -1;
1835 if (dup2(fd, 2) < 0)
1836 return -1;
1837
1838 return 0;
1839 }
1840
1841 int null_stdfds(void)
1842 {
1843 int ret = -1;
1844 int fd = open_devnull();
1845
1846 if (fd >= 0) {
1847 ret = set_stdfds(fd);
1848 close(fd);
1849 }
1850
1851 return ret;
1852 }
1853
1854 /*
1855 * Return the number of lines in file @fn, or -1 on error
1856 */
1857 int lxc_count_file_lines(const char *fn)
1858 {
1859 FILE *f;
1860 char *line = NULL;
1861 size_t sz = 0;
1862 int n = 0;
1863
1864 f = fopen_cloexec(fn, "r");
1865 if (!f)
1866 return -1;
1867
1868 while (getline(&line, &sz, f) != -1) {
1869 n++;
1870 }
1871 free(line);
1872 fclose(f);
1873 return n;
1874 }
1875
1876 void *lxc_strmmap(void *addr, size_t length, int prot, int flags, int fd,
1877 off_t offset)
1878 {
1879 void *tmp = NULL, *overlap = NULL;
1880
1881 /* We establish an anonymous mapping that is one byte larger than the
1882 * underlying file. The pages handed to us are zero filled. */
1883 tmp = mmap(addr, length + 1, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1884 if (tmp == MAP_FAILED)
1885 return tmp;
1886
1887 /* Now we establish a fixed-address mapping starting at the address we
1888 * received from our anonymous mapping and replace all bytes excluding
1889 * the additional \0-byte with the file. This allows us to use normal
1890 * string-handling functions. */
1891 overlap = mmap(tmp, length, prot, MAP_FIXED | flags, fd, offset);
1892 if (overlap == MAP_FAILED)
1893 munmap(tmp, length + 1);
1894
1895 return overlap;
1896 }
1897
1898 int lxc_strmunmap(void *addr, size_t length)
1899 {
1900 return munmap(addr, length + 1);
1901 }
1902
1903 /* Check whether a signal is blocked by a process. */
1904 /* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
1905 #define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
1906 bool task_blocking_signal(pid_t pid, int signal)
1907 {
1908 bool bret = false;
1909 char *line = NULL;
1910 long unsigned int sigblk = 0;
1911 size_t n = 0;
1912 int ret;
1913 FILE *f;
1914
1915 char status[__PROC_STATUS_LEN];
1916
1917 ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid);
1918 if (ret < 0 || ret >= __PROC_STATUS_LEN)
1919 return bret;
1920
1921 f = fopen(status, "r");
1922 if (!f)
1923 return bret;
1924
1925 while (getline(&line, &n, f) != -1) {
1926 if (!strncmp(line, "SigBlk:\t", 8))
1927 if (sscanf(line + 8, "%lx", &sigblk) != 1)
1928 goto out;
1929 }
1930
1931 if (sigblk & signal)
1932 bret = true;
1933
1934 out:
1935 free(line);
1936 fclose(f);
1937 return bret;
1938 }
1939
1940 static int lxc_append_null_to_list(void ***list)
1941 {
1942 int newentry = 0;
1943 void **tmp;
1944
1945 if (*list)
1946 for (; (*list)[newentry]; newentry++) {
1947 ;
1948 }
1949
1950 tmp = realloc(*list, (newentry + 2) * sizeof(void **));
1951 if (!tmp)
1952 return -1;
1953
1954 *list = tmp;
1955 (*list)[newentry + 1] = NULL;
1956
1957 return newentry;
1958 }
1959
1960 int lxc_append_string(char ***list, char *entry)
1961 {
1962 char *copy;
1963 int newentry;
1964
1965 newentry = lxc_append_null_to_list((void ***)list);
1966 if (newentry < 0)
1967 return -1;
1968
1969 copy = strdup(entry);
1970 if (!copy)
1971 return -1;
1972
1973 (*list)[newentry] = copy;
1974
1975 return 0;
1976 }
1977
1978 int lxc_preserve_ns(const int pid, const char *ns)
1979 {
1980 int ret;
1981 /* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1982 #define __NS_PATH_LEN 50
1983 char path[__NS_PATH_LEN];
1984
1985 /* This way we can use this function to also check whether namespaces
1986 * are supported by the kernel by passing in the NULL or the empty
1987 * string.
1988 */
1989 ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
1990 !ns || strcmp(ns, "") == 0 ? "" : "/",
1991 !ns || strcmp(ns, "") == 0 ? "" : ns);
1992 if (ret < 0 || (size_t)ret >= __NS_PATH_LEN)
1993 return -1;
1994
1995 return open(path, O_RDONLY | O_CLOEXEC);
1996 }
1997
1998 int lxc_safe_uint(const char *numstr, unsigned int *converted)
1999 {
2000 char *err = NULL;
2001 unsigned long int uli;
2002
2003 while (isspace(*numstr))
2004 numstr++;
2005
2006 if (*numstr == '-')
2007 return -EINVAL;
2008
2009 errno = 0;
2010 uli = strtoul(numstr, &err, 0);
2011 if (errno == ERANGE && uli == ULONG_MAX)
2012 return -ERANGE;
2013
2014 if (err == numstr || *err != '\0')
2015 return -EINVAL;
2016
2017 if (uli > UINT_MAX)
2018 return -ERANGE;
2019
2020 *converted = (unsigned int)uli;
2021 return 0;
2022 }
2023
2024 int lxc_safe_ulong(const char *numstr, unsigned long *converted)
2025 {
2026 char *err = NULL;
2027 unsigned long int uli;
2028
2029 while (isspace(*numstr))
2030 numstr++;
2031
2032 if (*numstr == '-')
2033 return -EINVAL;
2034
2035 errno = 0;
2036 uli = strtoul(numstr, &err, 0);
2037 if (errno == ERANGE && uli == ULONG_MAX)
2038 return -ERANGE;
2039
2040 if (err == numstr || *err != '\0')
2041 return -EINVAL;
2042
2043 *converted = uli;
2044 return 0;
2045 }
2046
2047 int lxc_safe_int(const char *numstr, int *converted)
2048 {
2049 char *err = NULL;
2050 signed long int sli;
2051
2052 errno = 0;
2053 sli = strtol(numstr, &err, 0);
2054 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
2055 return -ERANGE;
2056
2057 if (errno != 0 && sli == 0)
2058 return -EINVAL;
2059
2060 if (err == numstr || *err != '\0')
2061 return -EINVAL;
2062
2063 if (sli > INT_MAX || sli < INT_MIN)
2064 return -ERANGE;
2065
2066 *converted = (int)sli;
2067 return 0;
2068 }
2069
2070 int lxc_safe_long(const char *numstr, long int *converted)
2071 {
2072 char *err = NULL;
2073 signed long int sli;
2074
2075 errno = 0;
2076 sli = strtol(numstr, &err, 0);
2077 if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
2078 return -ERANGE;
2079
2080 if (errno != 0 && sli == 0)
2081 return -EINVAL;
2082
2083 if (err == numstr || *err != '\0')
2084 return -EINVAL;
2085
2086 *converted = sli;
2087 return 0;
2088 }
2089
2090 int lxc_switch_uid_gid(uid_t uid, gid_t gid)
2091 {
2092 if (setgid(gid) < 0) {
2093 SYSERROR("Failed to switch to gid %d.", gid);
2094 return -errno;
2095 }
2096 NOTICE("Switched to gid %d.", gid);
2097
2098 if (setuid(uid) < 0) {
2099 SYSERROR("Failed to switch to uid %d.", uid);
2100 return -errno;
2101 }
2102 NOTICE("Switched to uid %d.", uid);
2103
2104 return 0;
2105 }
2106
2107 /* Simple covenience function which enables uniform logging. */
2108 int lxc_setgroups(int size, gid_t list[])
2109 {
2110 if (setgroups(size, list) < 0) {
2111 SYSERROR("Failed to setgroups().");
2112 return -errno;
2113 }
2114 NOTICE("Dropped additional groups.");
2115
2116 return 0;
2117 }
2118
2119 static int lxc_get_unused_loop_dev_legacy(char *loop_name)
2120 {
2121 struct dirent *dp;
2122 struct loop_info64 lo64;
2123 DIR *dir;
2124 int dfd = -1, fd = -1, ret = -1;
2125
2126 dir = opendir("/dev");
2127 if (!dir)
2128 return -1;
2129
2130 while ((dp = readdir(dir))) {
2131 if (!dp)
2132 break;
2133
2134 if (strncmp(dp->d_name, "loop", 4) != 0)
2135 continue;
2136
2137 dfd = dirfd(dir);
2138 if (dfd < 0)
2139 continue;
2140
2141 fd = openat(dfd, dp->d_name, O_RDWR);
2142 if (fd < 0)
2143 continue;
2144
2145 ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
2146 if (ret < 0) {
2147 if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 ||
2148 errno != ENXIO) {
2149 close(fd);
2150 fd = -1;
2151 continue;
2152 }
2153 }
2154
2155 ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name);
2156 if (ret < 0 || ret >= LO_NAME_SIZE) {
2157 close(fd);
2158 fd = -1;
2159 continue;
2160 }
2161
2162 break;
2163 }
2164
2165 closedir(dir);
2166
2167 if (fd < 0)
2168 return -1;
2169
2170 return fd;
2171 }
2172
2173 static int lxc_get_unused_loop_dev(char *name_loop)
2174 {
2175 int loop_nr, ret;
2176 int fd_ctl = -1, fd_tmp = -1;
2177
2178 fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC);
2179 if (fd_ctl < 0)
2180 return -ENODEV;
2181
2182 loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE);
2183 if (loop_nr < 0)
2184 goto on_error;
2185
2186 ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr);
2187 if (ret < 0 || ret >= LO_NAME_SIZE)
2188 goto on_error;
2189
2190 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
2191 if (fd_tmp < 0)
2192 goto on_error;
2193
2194 on_error:
2195 close(fd_ctl);
2196 return fd_tmp;
2197 }
2198
2199 int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags)
2200 {
2201 int ret;
2202 struct loop_info64 lo64;
2203 int fd_img = -1, fret = -1, fd_loop = -1;
2204
2205 fd_loop = lxc_get_unused_loop_dev(loop_dev);
2206 if (fd_loop < 0) {
2207 if (fd_loop == -ENODEV)
2208 fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev);
2209 else
2210 goto on_error;
2211 }
2212
2213 fd_img = open(source, O_RDWR | O_CLOEXEC);
2214 if (fd_img < 0)
2215 goto on_error;
2216
2217 ret = ioctl(fd_loop, LOOP_SET_FD, fd_img);
2218 if (ret < 0)
2219 goto on_error;
2220
2221 memset(&lo64, 0, sizeof(lo64));
2222 lo64.lo_flags = flags;
2223
2224 ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64);
2225 if (ret < 0)
2226 goto on_error;
2227
2228 fret = 0;
2229
2230 on_error:
2231 if (fd_img >= 0)
2232 close(fd_img);
2233
2234 if (fret < 0 && fd_loop >= 0) {
2235 close(fd_loop);
2236 fd_loop = -1;
2237 }
2238
2239 return fd_loop;
2240 }
2241
2242 int lxc_unstack_mountpoint(const char *path, bool lazy)
2243 {
2244 int ret;
2245 int umounts = 0;
2246
2247 pop_stack:
2248 ret = umount2(path, lazy ? MNT_DETACH : 0);
2249 if (ret < 0) {
2250 /* We consider anything else than EINVAL deadly to prevent going
2251 * into an infinite loop. (The other alternative is constantly
2252 * parsing /proc/self/mountinfo which is yucky and probably
2253 * racy.)
2254 */
2255 if (errno != EINVAL)
2256 return -errno;
2257 } else {
2258 /* Just stop counting when this happens. That'd just be so
2259 * stupid that we won't even bother trying to report back the
2260 * correct value anymore.
2261 */
2262 if (umounts != INT_MAX)
2263 umounts++;
2264 /* We succeeded in umounting. Make sure that there's no other
2265 * mountpoint stacked underneath.
2266 */
2267 goto pop_stack;
2268 }
2269
2270 return umounts;
2271 }
2272
2273 int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
2274 {
2275 pid_t child;
2276 int ret, fret, pipefd[2];
2277 ssize_t bytes;
2278
2279 /* Make sure our callers do not receive unitialized memory. */
2280 if (buf_size > 0 && buf)
2281 buf[0] = '\0';
2282
2283 if (pipe(pipefd) < 0) {
2284 SYSERROR("failed to create pipe");
2285 return -1;
2286 }
2287
2288 child = fork();
2289 if (child < 0) {
2290 close(pipefd[0]);
2291 close(pipefd[1]);
2292 SYSERROR("failed to create new process");
2293 return -1;
2294 }
2295
2296 if (child == 0) {
2297 /* Close the read-end of the pipe. */
2298 close(pipefd[0]);
2299
2300 /* Redirect std{err,out} to write-end of the
2301 * pipe.
2302 */
2303 ret = dup2(pipefd[1], STDOUT_FILENO);
2304 if (ret >= 0)
2305 ret = dup2(pipefd[1], STDERR_FILENO);
2306
2307 /* Close the write-end of the pipe. */
2308 close(pipefd[1]);
2309
2310 if (ret < 0) {
2311 SYSERROR("failed to duplicate std{err,out} file descriptor");
2312 exit(EXIT_FAILURE);
2313 }
2314
2315 /* Does not return. */
2316 child_fn(args);
2317 ERROR("failed to exec command");
2318 exit(EXIT_FAILURE);
2319 }
2320
2321 /* close the write-end of the pipe */
2322 close(pipefd[1]);
2323
2324 bytes = read(pipefd[0], buf, (buf_size > 0) ? (buf_size - 1) : 0);
2325 if (bytes > 0)
2326 buf[bytes - 1] = '\0';
2327
2328 fret = wait_for_pid(child);
2329 /* close the read-end of the pipe */
2330 close(pipefd[0]);
2331
2332 return fret;
2333 }