]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.c
ovl_rsync: make sure to umount
[mirror_lxc.git] / src / lxc / utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "config.h"
25
26 #include <errno.h>
27 #include <unistd.h>
28 #include <stdlib.h>
29 #include <stddef.h>
30 #include <string.h>
31 #include <sys/types.h>
32 #include <sys/vfs.h>
33 #include <sys/stat.h>
34 #include <sys/mman.h>
35 #include <sys/param.h>
36 #include <sys/mount.h>
37 #include <dirent.h>
38 #include <fcntl.h>
39 #include <libgen.h>
40 #include <sys/types.h>
41 #include <sys/wait.h>
42 #include <assert.h>
43 #include <sys/prctl.h>
44
45 #include "utils.h"
46 #include "log.h"
47 #include "lxclock.h"
48 #include "namespace.h"
49
50 #ifndef PR_SET_MM
51 #define PR_SET_MM 35
52 #endif
53
54 #ifndef PR_SET_MM_ARG_START
55 #define PR_SET_MM_ARG_START 8
56 #endif
57
58 #ifndef PR_SET_MM_ARG_END
59 #define PR_SET_MM_ARG_END 9
60 #endif
61
62 #ifndef PR_SET_MM_ENV_START
63 #define PR_SET_MM_ENV_START 10
64 #endif
65
66 #ifndef PR_SET_MM_ENV_END
67 #define PR_SET_MM_ENV_END 11
68 #endif
69
70 lxc_log_define(lxc_utils, lxc);
71
72 /*
73 * if path is btrfs, tries to remove it and any subvolumes beneath it
74 */
75 extern bool btrfs_try_remove_subvol(const char *path);
76
77 static int _recursive_rmdir(char *dirname, dev_t pdev,
78 const char *exclude, int level, bool onedev)
79 {
80 struct dirent dirent, *direntp;
81 DIR *dir;
82 int ret, failed=0;
83 char pathname[MAXPATHLEN];
84 bool hadexclude = false;
85
86 dir = opendir(dirname);
87 if (!dir) {
88 ERROR("%s: failed to open %s", __func__, dirname);
89 return -1;
90 }
91
92 while (!readdir_r(dir, &dirent, &direntp)) {
93 struct stat mystat;
94 int rc;
95
96 if (!direntp)
97 break;
98
99 if (!strcmp(direntp->d_name, ".") ||
100 !strcmp(direntp->d_name, ".."))
101 continue;
102
103 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
104 if (rc < 0 || rc >= MAXPATHLEN) {
105 ERROR("pathname too long");
106 failed=1;
107 continue;
108 }
109
110 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
111 ret = rmdir(pathname);
112 if (ret < 0) {
113 switch(errno) {
114 case ENOTEMPTY:
115 INFO("Not deleting snapshot %s", pathname);
116 hadexclude = true;
117 break;
118 case ENOTDIR:
119 ret = unlink(pathname);
120 if (ret)
121 INFO("%s: failed to remove %s", __func__, pathname);
122 break;
123 default:
124 SYSERROR("%s: failed to rmdir %s", __func__, pathname);
125 failed = 1;
126 break;
127 }
128 }
129 continue;
130 }
131
132 ret = lstat(pathname, &mystat);
133 if (ret) {
134 ERROR("%s: failed to stat %s", __func__, pathname);
135 failed = 1;
136 continue;
137 }
138 if (onedev && mystat.st_dev != pdev) {
139 /* TODO should we be checking /proc/self/mountinfo for
140 * pathname and not doing this if found? */
141 if (btrfs_try_remove_subvol(pathname))
142 INFO("Removed btrfs subvolume at %s\n", pathname);
143 continue;
144 }
145 if (S_ISDIR(mystat.st_mode)) {
146 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
147 failed=1;
148 } else {
149 if (unlink(pathname) < 0) {
150 SYSERROR("%s: failed to delete %s", __func__, pathname);
151 failed=1;
152 }
153 }
154 }
155
156 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
157 ERROR("%s: failed to delete %s", __func__, dirname);
158 failed=1;
159 }
160
161 ret = closedir(dir);
162 if (ret) {
163 ERROR("%s: failed to close directory %s", __func__, dirname);
164 failed=1;
165 }
166
167 return failed ? -1 : 0;
168 }
169
170 /* we have two different magic values for overlayfs, yay */
171 #define OVERLAYFS_SUPER_MAGIC 0x794c764f
172 #define OVERLAY_SUPER_MAGIC 0x794c7630
173 /*
174 * In overlayfs, st_dev is unreliable. so on overlayfs we don't do
175 * the lxc_rmdir_onedev()
176 */
177 static bool is_native_overlayfs(const char *path)
178 {
179 struct statfs sb;
180
181 if (statfs(path, &sb) < 0)
182 return false;
183 if (sb.f_type == OVERLAYFS_SUPER_MAGIC ||
184 sb.f_type == OVERLAY_SUPER_MAGIC)
185 return true;
186 return false;
187 }
188
189 /* returns 0 on success, -1 if there were any failures */
190 extern int lxc_rmdir_onedev(char *path, const char *exclude)
191 {
192 struct stat mystat;
193 bool onedev = true;
194
195 if (is_native_overlayfs(path)) {
196 onedev = false;
197 }
198
199 if (lstat(path, &mystat) < 0) {
200 if (errno == ENOENT)
201 return 0;
202 ERROR("%s: failed to stat %s", __func__, path);
203 return -1;
204 }
205
206 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
207 }
208
209 /* borrowed from iproute2 */
210 extern int get_u16(unsigned short *val, const char *arg, int base)
211 {
212 unsigned long res;
213 char *ptr;
214
215 if (!arg || !*arg)
216 return -1;
217
218 errno = 0;
219 res = strtoul(arg, &ptr, base);
220 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
221 return -1;
222
223 *val = res;
224
225 return 0;
226 }
227
228 extern int mkdir_p(const char *dir, mode_t mode)
229 {
230 const char *tmp = dir;
231 const char *orig = dir;
232 char *makeme;
233
234 do {
235 dir = tmp + strspn(tmp, "/");
236 tmp = dir + strcspn(dir, "/");
237 makeme = strndup(orig, dir - orig);
238 if (*makeme) {
239 if (mkdir(makeme, mode) && errno != EEXIST) {
240 SYSERROR("failed to create directory '%s'", makeme);
241 free(makeme);
242 return -1;
243 }
244 }
245 free(makeme);
246 } while(tmp != dir);
247
248 return 0;
249 }
250
251 char *get_rundir()
252 {
253 char *rundir;
254 const char *homedir;
255
256 if (geteuid() == 0) {
257 rundir = strdup(RUNTIME_PATH);
258 return rundir;
259 }
260
261 rundir = getenv("XDG_RUNTIME_DIR");
262 if (rundir) {
263 rundir = strdup(rundir);
264 return rundir;
265 }
266
267 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
268 homedir = getenv("HOME");
269 if (!homedir) {
270 ERROR("HOME isn't set in the environment.");
271 return NULL;
272 }
273
274 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
275 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
276
277 return rundir;
278 }
279
280 int wait_for_pid(pid_t pid)
281 {
282 int status, ret;
283
284 again:
285 ret = waitpid(pid, &status, 0);
286 if (ret == -1) {
287 if (errno == EINTR)
288 goto again;
289 return -1;
290 }
291 if (ret != pid)
292 goto again;
293 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
294 return -1;
295 return 0;
296 }
297
298 int lxc_wait_for_pid_status(pid_t pid)
299 {
300 int status, ret;
301
302 again:
303 ret = waitpid(pid, &status, 0);
304 if (ret == -1) {
305 if (errno == EINTR)
306 goto again;
307 return -1;
308 }
309 if (ret != pid)
310 goto again;
311 return status;
312 }
313
314 ssize_t lxc_write_nointr(int fd, const void* buf, size_t count)
315 {
316 ssize_t ret;
317 again:
318 ret = write(fd, buf, count);
319 if (ret < 0 && errno == EINTR)
320 goto again;
321 return ret;
322 }
323
324 ssize_t lxc_read_nointr(int fd, void* buf, size_t count)
325 {
326 ssize_t ret;
327 again:
328 ret = read(fd, buf, count);
329 if (ret < 0 && errno == EINTR)
330 goto again;
331 return ret;
332 }
333
334 ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf)
335 {
336 ssize_t ret;
337 ret = lxc_read_nointr(fd, buf, count);
338 if (ret <= 0)
339 return ret;
340 if ((size_t)ret != count)
341 return -1;
342 if (expected_buf && memcmp(buf, expected_buf, count) != 0) {
343 errno = EINVAL;
344 return -1;
345 }
346 return ret;
347 }
348
349 #if HAVE_LIBGNUTLS
350 #include <gnutls/gnutls.h>
351 #include <gnutls/crypto.h>
352
353 __attribute__((constructor))
354 static void gnutls_lxc_init(void)
355 {
356 gnutls_global_init();
357 }
358
359 int sha1sum_file(char *fnam, unsigned char *digest)
360 {
361 char *buf;
362 int ret;
363 FILE *f;
364 long flen;
365
366 if (!fnam)
367 return -1;
368 f = fopen_cloexec(fnam, "r");
369 if (!f) {
370 SYSERROR("Error opening template");
371 return -1;
372 }
373 if (fseek(f, 0, SEEK_END) < 0) {
374 SYSERROR("Error seeking to end of template");
375 fclose(f);
376 return -1;
377 }
378 if ((flen = ftell(f)) < 0) {
379 SYSERROR("Error telling size of template");
380 fclose(f);
381 return -1;
382 }
383 if (fseek(f, 0, SEEK_SET) < 0) {
384 SYSERROR("Error seeking to start of template");
385 fclose(f);
386 return -1;
387 }
388 if ((buf = malloc(flen+1)) == NULL) {
389 SYSERROR("Out of memory");
390 fclose(f);
391 return -1;
392 }
393 if (fread(buf, 1, flen, f) != flen) {
394 SYSERROR("Failure reading template");
395 free(buf);
396 fclose(f);
397 return -1;
398 }
399 if (fclose(f) < 0) {
400 SYSERROR("Failre closing template");
401 free(buf);
402 return -1;
403 }
404 buf[flen] = '\0';
405 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
406 free(buf);
407 return ret;
408 }
409 #endif
410
411 char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup)
412 {
413 va_list ap2;
414 size_t count = 1 + skip;
415 char **result;
416
417 /* first determine size of argument list, we don't want to reallocate
418 * constantly...
419 */
420 va_copy(ap2, ap);
421 while (1) {
422 char* arg = va_arg(ap2, char*);
423 if (!arg)
424 break;
425 count++;
426 }
427 va_end(ap2);
428
429 result = calloc(count, sizeof(char*));
430 if (!result)
431 return NULL;
432 count = skip;
433 while (1) {
434 char* arg = va_arg(ap, char*);
435 if (!arg)
436 break;
437 arg = do_strdup ? strdup(arg) : arg;
438 if (!arg)
439 goto oom;
440 result[count++] = arg;
441 }
442
443 /* calloc has already set last element to NULL*/
444 return result;
445
446 oom:
447 free(result);
448 return NULL;
449 }
450
451 const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip)
452 {
453 return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0);
454 }
455
456 extern struct lxc_popen_FILE *lxc_popen(const char *command)
457 {
458 struct lxc_popen_FILE *fp = NULL;
459 int parent_end = -1, child_end = -1;
460 int pipe_fds[2];
461 pid_t child_pid;
462
463 int r = pipe2(pipe_fds, O_CLOEXEC);
464
465 if (r < 0) {
466 ERROR("pipe2 failure");
467 return NULL;
468 }
469
470 parent_end = pipe_fds[0];
471 child_end = pipe_fds[1];
472
473 child_pid = fork();
474
475 if (child_pid == 0) {
476 /* child */
477 int child_std_end = STDOUT_FILENO;
478
479 if (child_end != child_std_end) {
480 /* dup2() doesn't dup close-on-exec flag */
481 dup2(child_end, child_std_end);
482
483 /* it's safe not to close child_end here
484 * as it's marked close-on-exec anyway
485 */
486 } else {
487 /*
488 * The descriptor is already the one we will use.
489 * But it must not be marked close-on-exec.
490 * Undo the effects.
491 */
492 if (fcntl(child_end, F_SETFD, 0) != 0) {
493 SYSERROR("Failed to remove FD_CLOEXEC from fd.");
494 exit(127);
495 }
496 }
497
498 /*
499 * Unblock signals.
500 * This is the main/only reason
501 * why we do our lousy popen() emulation.
502 */
503 {
504 sigset_t mask;
505 sigfillset(&mask);
506 sigprocmask(SIG_UNBLOCK, &mask, NULL);
507 }
508
509 execl("/bin/sh", "sh", "-c", command, (char *) NULL);
510 exit(127);
511 }
512
513 /* parent */
514
515 close(child_end);
516 child_end = -1;
517
518 if (child_pid < 0) {
519 ERROR("fork failure");
520 goto error;
521 }
522
523 fp = calloc(1, sizeof(*fp));
524 if (!fp) {
525 ERROR("failed to allocate memory");
526 goto error;
527 }
528
529 fp->f = fdopen(parent_end, "r");
530 if (!fp->f) {
531 ERROR("fdopen failure");
532 goto error;
533 }
534
535 fp->child_pid = child_pid;
536
537 return fp;
538
539 error:
540
541 if (fp) {
542 if (fp->f) {
543 fclose(fp->f);
544 parent_end = -1; /* so we do not close it second time */
545 }
546
547 free(fp);
548 }
549
550 if (parent_end != -1)
551 close(parent_end);
552
553 return NULL;
554 }
555
556 extern int lxc_pclose(struct lxc_popen_FILE *fp)
557 {
558 FILE *f = NULL;
559 pid_t child_pid = 0;
560 int wstatus = 0;
561 pid_t wait_pid;
562
563 if (fp) {
564 f = fp->f;
565 child_pid = fp->child_pid;
566 /* free memory (we still need to close file stream) */
567 free(fp);
568 fp = NULL;
569 }
570
571 if (!f || fclose(f)) {
572 ERROR("fclose failure");
573 return -1;
574 }
575
576 do {
577 wait_pid = waitpid(child_pid, &wstatus, 0);
578 } while (wait_pid == -1 && errno == EINTR);
579
580 if (wait_pid == -1) {
581 ERROR("waitpid failure");
582 return -1;
583 }
584
585 return wstatus;
586 }
587
588 char *lxc_string_replace(const char *needle, const char *replacement, const char *haystack)
589 {
590 ssize_t len = -1, saved_len = -1;
591 char *result = NULL;
592 size_t replacement_len = strlen(replacement);
593 size_t needle_len = strlen(needle);
594
595 /* should be executed exactly twice */
596 while (len == -1 || result == NULL) {
597 char *p;
598 char *last_p;
599 ssize_t part_len;
600
601 if (len != -1) {
602 result = calloc(1, len + 1);
603 if (!result)
604 return NULL;
605 saved_len = len;
606 }
607
608 len = 0;
609
610 for (last_p = (char *)haystack, p = strstr(last_p, needle); p; last_p = p, p = strstr(last_p, needle)) {
611 part_len = (ssize_t)(p - last_p);
612 if (result && part_len > 0)
613 memcpy(&result[len], last_p, part_len);
614 len += part_len;
615 if (result && replacement_len > 0)
616 memcpy(&result[len], replacement, replacement_len);
617 len += replacement_len;
618 p += needle_len;
619 }
620 part_len = strlen(last_p);
621 if (result && part_len > 0)
622 memcpy(&result[len], last_p, part_len);
623 len += part_len;
624 }
625
626 /* make sure we did the same thing twice,
627 * once for calculating length, the other
628 * time for copying data */
629 assert(saved_len == len);
630 /* make sure we didn't overwrite any buffer,
631 * due to calloc the string should be 0-terminated */
632 assert(result[len] == '\0');
633
634 return result;
635 }
636
637 bool lxc_string_in_array(const char *needle, const char **haystack)
638 {
639 for (; haystack && *haystack; haystack++)
640 if (!strcmp(needle, *haystack))
641 return true;
642 return false;
643 }
644
645 char *lxc_string_join(const char *sep, const char **parts, bool use_as_prefix)
646 {
647 char *result;
648 char **p;
649 size_t sep_len = strlen(sep);
650 size_t result_len = use_as_prefix * sep_len;
651
652 /* calculate new string length */
653 for (p = (char **)parts; *p; p++)
654 result_len += (p > (char **)parts) * sep_len + strlen(*p);
655
656 result = calloc(result_len + 1, 1);
657 if (!result)
658 return NULL;
659
660 if (use_as_prefix)
661 strcpy(result, sep);
662 for (p = (char **)parts; *p; p++) {
663 if (p > (char **)parts)
664 strcat(result, sep);
665 strcat(result, *p);
666 }
667
668 return result;
669 }
670
671 char **lxc_normalize_path(const char *path)
672 {
673 char **components;
674 char **p;
675 size_t components_len = 0;
676 size_t pos = 0;
677
678 components = lxc_string_split(path, '/');
679 if (!components)
680 return NULL;
681 for (p = components; *p; p++)
682 components_len++;
683
684 /* resolve '.' and '..' */
685 for (pos = 0; pos < components_len; ) {
686 if (!strcmp(components[pos], ".") || (!strcmp(components[pos], "..") && pos == 0)) {
687 /* eat this element */
688 free(components[pos]);
689 memmove(&components[pos], &components[pos+1], sizeof(char *) * (components_len - pos));
690 components_len--;
691 } else if (!strcmp(components[pos], "..")) {
692 /* eat this and the previous element */
693 free(components[pos - 1]);
694 free(components[pos]);
695 memmove(&components[pos-1], &components[pos+1], sizeof(char *) * (components_len - pos));
696 components_len -= 2;
697 pos--;
698 } else {
699 pos++;
700 }
701 }
702
703 return components;
704 }
705
706 char *lxc_append_paths(const char *first, const char *second)
707 {
708 size_t len = strlen(first) + strlen(second) + 1;
709 const char *pattern = "%s%s";
710 char *result = NULL;
711
712 if (second[0] != '/') {
713 len += 1;
714 pattern = "%s/%s";
715 }
716
717 result = calloc(1, len);
718 if (!result)
719 return NULL;
720
721 snprintf(result, len, pattern, first, second);
722 return result;
723 }
724
725 bool lxc_string_in_list(const char *needle, const char *haystack, char _sep)
726 {
727 char *token, *str, *saveptr = NULL;
728 char sep[2] = { _sep, '\0' };
729
730 if (!haystack || !needle)
731 return 0;
732
733 str = alloca(strlen(haystack)+1);
734 strcpy(str, haystack);
735 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
736 if (strcmp(needle, token) == 0)
737 return 1;
738 }
739
740 return 0;
741 }
742
743 char **lxc_string_split(const char *string, char _sep)
744 {
745 char *token, *str, *saveptr = NULL;
746 char sep[2] = { _sep, '\0' };
747 char **result = NULL;
748 size_t result_capacity = 0;
749 size_t result_count = 0;
750 int r, saved_errno;
751
752 if (!string)
753 return calloc(1, sizeof(char *));
754
755 str = alloca(strlen(string)+1);
756 strcpy(str, string);
757 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
758 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
759 if (r < 0)
760 goto error_out;
761 result[result_count] = strdup(token);
762 if (!result[result_count])
763 goto error_out;
764 result_count++;
765 }
766
767 /* if we allocated too much, reduce it */
768 return realloc(result, (result_count + 1) * sizeof(char *));
769 error_out:
770 saved_errno = errno;
771 lxc_free_array((void **)result, free);
772 errno = saved_errno;
773 return NULL;
774 }
775
776 char **lxc_string_split_and_trim(const char *string, char _sep)
777 {
778 char *token, *str, *saveptr = NULL;
779 char sep[2] = { _sep, '\0' };
780 char **result = NULL;
781 size_t result_capacity = 0;
782 size_t result_count = 0;
783 int r, saved_errno;
784 size_t i = 0;
785
786 if (!string)
787 return calloc(1, sizeof(char *));
788
789 str = alloca(strlen(string)+1);
790 strcpy(str, string);
791 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
792 while (token[0] == ' ' || token[0] == '\t')
793 token++;
794 i = strlen(token);
795 while (i > 0 && (token[i - 1] == ' ' || token[i - 1] == '\t')) {
796 token[i - 1] = '\0';
797 i--;
798 }
799 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
800 if (r < 0)
801 goto error_out;
802 result[result_count] = strdup(token);
803 if (!result[result_count])
804 goto error_out;
805 result_count++;
806 }
807
808 /* if we allocated too much, reduce it */
809 return realloc(result, (result_count + 1) * sizeof(char *));
810 error_out:
811 saved_errno = errno;
812 lxc_free_array((void **)result, free);
813 errno = saved_errno;
814 return NULL;
815 }
816
817 void lxc_free_array(void **array, lxc_free_fn element_free_fn)
818 {
819 void **p;
820 for (p = array; p && *p; p++)
821 element_free_fn(*p);
822 free((void*)array);
823 }
824
825 int lxc_grow_array(void ***array, size_t* capacity, size_t new_size, size_t capacity_increment)
826 {
827 size_t new_capacity;
828 void **new_array;
829
830 /* first time around, catch some trivial mistakes of the user
831 * only initializing one of these */
832 if (!*array || !*capacity) {
833 *array = NULL;
834 *capacity = 0;
835 }
836
837 new_capacity = *capacity;
838 while (new_size + 1 > new_capacity)
839 new_capacity += capacity_increment;
840 if (new_capacity != *capacity) {
841 /* we have to reallocate */
842 new_array = realloc(*array, new_capacity * sizeof(void *));
843 if (!new_array)
844 return -1;
845 memset(&new_array[*capacity], 0, (new_capacity - (*capacity)) * sizeof(void *));
846 *array = new_array;
847 *capacity = new_capacity;
848 }
849
850 /* array has sufficient elements */
851 return 0;
852 }
853
854 size_t lxc_array_len(void **array)
855 {
856 void **p;
857 size_t result = 0;
858
859 for (p = array; p && *p; p++)
860 result++;
861
862 return result;
863 }
864
865 int lxc_write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
866 {
867 int fd, saved_errno;
868 ssize_t ret;
869
870 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
871 if (fd < 0)
872 return -1;
873 ret = lxc_write_nointr(fd, buf, count);
874 if (ret < 0)
875 goto out_error;
876 if ((size_t)ret != count)
877 goto out_error;
878 if (add_newline) {
879 ret = lxc_write_nointr(fd, "\n", 1);
880 if (ret != 1)
881 goto out_error;
882 }
883 close(fd);
884 return 0;
885
886 out_error:
887 saved_errno = errno;
888 close(fd);
889 errno = saved_errno;
890 return -1;
891 }
892
893 int lxc_read_from_file(const char *filename, void* buf, size_t count)
894 {
895 int fd = -1, saved_errno;
896 ssize_t ret;
897
898 fd = open(filename, O_RDONLY | O_CLOEXEC);
899 if (fd < 0)
900 return -1;
901
902 if (!buf || !count) {
903 char buf2[100];
904 size_t count2 = 0;
905 while ((ret = read(fd, buf2, 100)) > 0)
906 count2 += ret;
907 if (ret >= 0)
908 ret = count2;
909 } else {
910 memset(buf, 0, count);
911 ret = read(fd, buf, count);
912 }
913
914 if (ret < 0)
915 ERROR("read %s: %s", filename, strerror(errno));
916
917 saved_errno = errno;
918 close(fd);
919 errno = saved_errno;
920 return ret;
921 }
922
923 void **lxc_append_null_to_array(void **array, size_t count)
924 {
925 void **temp;
926
927 /* Append NULL to the array */
928 if (count) {
929 temp = realloc(array, (count + 1) * sizeof(*array));
930 if (!temp) {
931 int i;
932 for (i = 0; i < count; i++)
933 free(array[i]);
934 free(array);
935 return NULL;
936 }
937 array = temp;
938 array[count] = NULL;
939 }
940 return array;
941 }
942
943 int randseed(bool srand_it)
944 {
945 /*
946 srand pre-seed function based on /dev/urandom
947 */
948 unsigned int seed=time(NULL)+getpid();
949
950 FILE *f;
951 f = fopen("/dev/urandom", "r");
952 if (f) {
953 int ret = fread(&seed, sizeof(seed), 1, f);
954 if (ret != 1)
955 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno));
956 fclose(f);
957 }
958
959 if (srand_it)
960 srand(seed);
961
962 return seed;
963 }
964
965 uid_t get_ns_uid(uid_t orig)
966 {
967 char *line = NULL;
968 size_t sz = 0;
969 uid_t nsid, hostid, range;
970 FILE *f = fopen("/proc/self/uid_map", "r");
971 if (!f)
972 return 0;
973
974 while (getline(&line, &sz, f) != -1) {
975 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
976 continue;
977 if (hostid <= orig && hostid + range > orig) {
978 nsid += orig - hostid;
979 goto found;
980 }
981 }
982
983 nsid = 0;
984 found:
985 fclose(f);
986 free(line);
987 return nsid;
988 }
989
990 bool dir_exists(const char *path)
991 {
992 struct stat sb;
993 int ret;
994
995 ret = stat(path, &sb);
996 if (ret < 0)
997 // could be something other than eexist, just say no
998 return false;
999 return S_ISDIR(sb.st_mode);
1000 }
1001
1002 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1003 * FNV has good anti collision properties and we're not worried
1004 * about pre-image resistance or one-way-ness, we're just trying to make
1005 * the name unique in the 108 bytes of space we have.
1006 */
1007 uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1008 {
1009 unsigned char *bp;
1010
1011 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1012 {
1013 /* xor the bottom with the current octet */
1014 hval ^= (uint64_t)*bp;
1015
1016 /* gcc optimised:
1017 * multiply by the 64 bit FNV magic prime mod 2^64
1018 */
1019 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1020 (hval << 7) + (hval << 8) + (hval << 40);
1021 }
1022
1023 return hval;
1024 }
1025
1026 /*
1027 * Detect whether / is mounted MS_SHARED. The only way I know of to
1028 * check that is through /proc/self/mountinfo.
1029 * I'm only checking for /. If the container rootfs or mount location
1030 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1031 * out would be too much work to be worth it.
1032 */
1033 #define LINELEN 4096
1034 int detect_shared_rootfs(void)
1035 {
1036 char buf[LINELEN], *p;
1037 FILE *f;
1038 int i;
1039 char *p2;
1040
1041 f = fopen("/proc/self/mountinfo", "r");
1042 if (!f)
1043 return 0;
1044 while (fgets(buf, LINELEN, f)) {
1045 for (p = buf, i=0; p && i < 4; i++)
1046 p = strchr(p+1, ' ');
1047 if (!p)
1048 continue;
1049 p2 = strchr(p+1, ' ');
1050 if (!p2)
1051 continue;
1052 *p2 = '\0';
1053 if (strcmp(p+1, "/") == 0) {
1054 // this is '/'. is it shared?
1055 p = strchr(p2+1, ' ');
1056 if (p && strstr(p, "shared:")) {
1057 fclose(f);
1058 return 1;
1059 }
1060 }
1061 }
1062 fclose(f);
1063 return 0;
1064 }
1065
1066 bool switch_to_ns(pid_t pid, const char *ns) {
1067 int fd, ret;
1068 char nspath[MAXPATHLEN];
1069
1070 /* Switch to new ns */
1071 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
1072 if (ret < 0 || ret >= MAXPATHLEN)
1073 return false;
1074
1075 fd = open(nspath, O_RDONLY);
1076 if (fd < 0) {
1077 SYSERROR("failed to open %s", nspath);
1078 return false;
1079 }
1080
1081 ret = setns(fd, 0);
1082 if (ret) {
1083 SYSERROR("failed to set process %d to %s of %d.", pid, ns, fd);
1084 close(fd);
1085 return false;
1086 }
1087 close(fd);
1088 return true;
1089 }
1090
1091 /*
1092 * looking at fs/proc_namespace.c, it appears we can
1093 * actually expect the rootfs entry to very specifically contain
1094 * " - rootfs rootfs "
1095 * IIUC, so long as we've chrooted so that rootfs is not our root,
1096 * the rootfs entry should always be skipped in mountinfo contents.
1097 */
1098 int detect_ramfs_rootfs(void)
1099 {
1100 char buf[LINELEN], *p;
1101 FILE *f;
1102 int i;
1103 char *p2;
1104
1105 f = fopen("/proc/self/mountinfo", "r");
1106 if (!f)
1107 return 0;
1108 while (fgets(buf, LINELEN, f)) {
1109 for (p = buf, i=0; p && i < 4; i++)
1110 p = strchr(p+1, ' ');
1111 if (!p)
1112 continue;
1113 p2 = strchr(p+1, ' ');
1114 if (!p2)
1115 continue;
1116 *p2 = '\0';
1117 if (strcmp(p+1, "/") == 0) {
1118 // this is '/'. is it the ramfs?
1119 p = strchr(p2+1, '-');
1120 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
1121 fclose(f);
1122 return 1;
1123 }
1124 }
1125 }
1126 fclose(f);
1127 return 0;
1128 }
1129
1130 char *on_path(char *cmd, const char *rootfs) {
1131 char *path = NULL;
1132 char *entry = NULL;
1133 char *saveptr = NULL;
1134 char cmdpath[MAXPATHLEN];
1135 int ret;
1136
1137 path = getenv("PATH");
1138 if (!path)
1139 return NULL;
1140
1141 path = strdup(path);
1142 if (!path)
1143 return NULL;
1144
1145 entry = strtok_r(path, ":", &saveptr);
1146 while (entry) {
1147 if (rootfs)
1148 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs, entry, cmd);
1149 else
1150 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
1151
1152 if (ret < 0 || ret >= MAXPATHLEN)
1153 goto next_loop;
1154
1155 if (access(cmdpath, X_OK) == 0) {
1156 free(path);
1157 return strdup(cmdpath);
1158 }
1159
1160 next_loop:
1161 entry = strtok_r(NULL, ":", &saveptr);
1162 }
1163
1164 free(path);
1165 return NULL;
1166 }
1167
1168 bool file_exists(const char *f)
1169 {
1170 struct stat statbuf;
1171
1172 return stat(f, &statbuf) == 0;
1173 }
1174
1175 /* historically lxc-init has been under /usr/lib/lxc and under
1176 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1177 */
1178 char *choose_init(const char *rootfs)
1179 {
1180 char *retv = NULL;
1181 const char *empty = "",
1182 *tmp;
1183 int ret, env_set = 0;
1184 struct stat mystat;
1185
1186 if (!getenv("PATH")) {
1187 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1188 SYSERROR("Failed to setenv");
1189 env_set = 1;
1190 }
1191
1192 retv = on_path("init.lxc", rootfs);
1193
1194 if (env_set) {
1195 if (unsetenv("PATH"))
1196 SYSERROR("Failed to unsetenv");
1197 }
1198
1199 if (retv)
1200 return retv;
1201
1202 retv = malloc(PATH_MAX);
1203 if (!retv)
1204 return NULL;
1205
1206 if (rootfs)
1207 tmp = rootfs;
1208 else
1209 tmp = empty;
1210
1211 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
1212 if (ret < 0 || ret >= PATH_MAX) {
1213 ERROR("pathname too long");
1214 goto out1;
1215 }
1216
1217 ret = stat(retv, &mystat);
1218 if (ret == 0)
1219 return retv;
1220
1221 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
1222 if (ret < 0 || ret >= PATH_MAX) {
1223 ERROR("pathname too long");
1224 goto out1;
1225 }
1226
1227 ret = stat(retv, &mystat);
1228 if (ret == 0)
1229 return retv;
1230
1231 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
1232 if (ret < 0 || ret >= PATH_MAX) {
1233 ERROR("pathname too long");
1234 goto out1;
1235 }
1236 ret = stat(retv, &mystat);
1237 if (ret == 0)
1238 return retv;
1239
1240 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
1241 if (ret < 0 || ret >= PATH_MAX) {
1242 ERROR("pathname too long");
1243 goto out1;
1244 }
1245 ret = stat(retv, &mystat);
1246 if (ret == 0)
1247 return retv;
1248
1249 /*
1250 * Last resort, look for the statically compiled init.lxc which we
1251 * hopefully bind-mounted in.
1252 * If we are called during container setup, and we get to this point,
1253 * then the init.lxc.static from the host will need to be bind-mounted
1254 * in. So we return NULL here to indicate that.
1255 */
1256 if (rootfs)
1257 goto out1;
1258
1259 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
1260 if (ret < 0 || ret >= PATH_MAX) {
1261 WARN("Nonsense - name /lxc.init.static too long");
1262 goto out1;
1263 }
1264 ret = stat(retv, &mystat);
1265 if (ret == 0)
1266 return retv;
1267
1268 out1:
1269 free(retv);
1270 return NULL;
1271 }
1272
1273 int print_to_file(const char *file, const char *content)
1274 {
1275 FILE *f;
1276 int ret = 0;
1277
1278 f = fopen(file, "w");
1279 if (!f)
1280 return -1;
1281 if (fprintf(f, "%s", content) != strlen(content))
1282 ret = -1;
1283 fclose(f);
1284 return ret;
1285 }
1286
1287 int is_dir(const char *path)
1288 {
1289 struct stat statbuf;
1290 int ret = stat(path, &statbuf);
1291 if (ret == 0 && S_ISDIR(statbuf.st_mode))
1292 return 1;
1293 return 0;
1294 }
1295
1296 /*
1297 * Given the '-t' template option to lxc-create, figure out what to
1298 * do. If the template is a full executable path, use that. If it
1299 * is something like 'sshd', then return $templatepath/lxc-sshd.
1300 * On success return the template, on error return NULL.
1301 */
1302 char *get_template_path(const char *t)
1303 {
1304 int ret, len;
1305 char *tpath;
1306
1307 if (t[0] == '/' && access(t, X_OK) == 0) {
1308 tpath = strdup(t);
1309 return tpath;
1310 }
1311
1312 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
1313 tpath = malloc(len);
1314 if (!tpath)
1315 return NULL;
1316 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
1317 if (ret < 0 || ret >= len) {
1318 free(tpath);
1319 return NULL;
1320 }
1321 if (access(tpath, X_OK) < 0) {
1322 SYSERROR("bad template: %s", t);
1323 free(tpath);
1324 return NULL;
1325 }
1326
1327 return tpath;
1328 }
1329
1330 /*
1331 * Sets the process title to the specified title. Note:
1332 * 1. this function requires root to succeed
1333 * 2. it clears /proc/self/environ
1334 * 3. it may not succed (e.g. if title is longer than /proc/self/environ +
1335 * the original title)
1336 */
1337 int setproctitle(char *title)
1338 {
1339 char buf[2048], *tmp;
1340 FILE *f;
1341 int i, len, ret = 0;
1342 unsigned long arg_start, arg_end, env_start, env_end;
1343
1344 f = fopen_cloexec("/proc/self/stat", "r");
1345 if (!f) {
1346 return -1;
1347 }
1348
1349 tmp = fgets(buf, sizeof(buf), f);
1350 fclose(f);
1351 if (!tmp) {
1352 return -1;
1353 }
1354
1355 /* Skip the first 47 fields, column 48-51 are ARG_START and
1356 * ARG_END. */
1357 tmp = strchr(buf, ' ');
1358 for (i = 0; i < 46; i++) {
1359 if (!tmp)
1360 return -1;
1361 tmp = strchr(tmp+1, ' ');
1362 }
1363
1364 if (!tmp)
1365 return -1;
1366
1367 i = sscanf(tmp, "%lu %lu %lu %lu", &arg_start, &arg_end, &env_start, &env_end);
1368 if (i != 4) {
1369 return -1;
1370 }
1371
1372 /* Include the null byte here, because in the calculations below we
1373 * want to have room for it. */
1374 len = strlen(title) + 1;
1375
1376 /* We're truncating the environment, so we should use at most the
1377 * length of the argument + environment for the title. */
1378 if (len > env_end - arg_start) {
1379 arg_end = env_end;
1380 len = env_end - arg_start;
1381 } else {
1382 /* Only truncate the environment if we're actually going to
1383 * overwrite part of it. */
1384 if (len >= arg_end - arg_start) {
1385 env_start = env_end;
1386 }
1387
1388 arg_end = arg_start + len;
1389
1390 /* check overflow */
1391 if (arg_end < len || arg_end < arg_start) {
1392 return -1;
1393 }
1394
1395 }
1396
1397 strcpy((char*)arg_start, title);
1398
1399 ret |= prctl(PR_SET_MM, PR_SET_MM_ARG_START, arg_start, 0, 0);
1400 ret |= prctl(PR_SET_MM, PR_SET_MM_ARG_END, arg_end, 0, 0);
1401 ret |= prctl(PR_SET_MM, PR_SET_MM_ENV_START, env_start, 0, 0);
1402 ret |= prctl(PR_SET_MM, PR_SET_MM_ENV_END, env_end, 0, 0);
1403
1404 return ret;
1405 }
1406
1407 /*
1408 * Mount a proc under @rootfs if proc self points to a pid other than
1409 * my own. This is needed to have a known-good proc mount for setting
1410 * up LSMs both at container startup and attach.
1411 *
1412 * @rootfs : the rootfs where proc should be mounted
1413 *
1414 * Returns < 0 on failure, 0 if the correct proc was already mounted
1415 * and 1 if a new proc was mounted.
1416 */
1417 int mount_proc_if_needed(const char *rootfs)
1418 {
1419 char path[MAXPATHLEN];
1420 char link[20];
1421 int linklen, ret;
1422 int mypid;
1423
1424 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1425 if (ret < 0 || ret >= MAXPATHLEN) {
1426 SYSERROR("proc path name too long");
1427 return -1;
1428 }
1429 memset(link, 0, 20);
1430 linklen = readlink(path, link, 20);
1431 mypid = (int)getpid();
1432 INFO("I am %d, /proc/self points to '%s'", mypid, link);
1433 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
1434 if (linklen < 0) /* /proc not mounted */
1435 goto domount;
1436 if (atoi(link) != mypid) {
1437 /* wrong /procs mounted */
1438 umount2(path, MNT_DETACH); /* ignore failure */
1439 goto domount;
1440 }
1441 /* the right proc is already mounted */
1442 return 0;
1443
1444 domount:
1445 if (mount("proc", path, "proc", 0, NULL))
1446 return -1;
1447 INFO("Mounted /proc in container for security transition");
1448 return 1;
1449 }
1450
1451 int null_stdfds(void)
1452 {
1453 int fd, ret = -1;
1454
1455 fd = open("/dev/null", O_RDWR);
1456 if (fd < 0)
1457 return -1;
1458
1459 if (dup2(fd, 0) < 0)
1460 goto err;
1461 if (dup2(fd, 1) < 0)
1462 goto err;
1463 if (dup2(fd, 2) < 0)
1464 goto err;
1465
1466 ret = 0;
1467 err:
1468 close(fd);
1469 return ret;
1470 }