]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/utils.c
lxccontainer: detect if we should send SIGRTMIN+3
[mirror_lxc.git] / src / lxc / utils.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #include "config.h"
25
26 #include <assert.h>
27 #include <dirent.h>
28 #include <errno.h>
29 #include <fcntl.h>
30 #include <libgen.h>
31 #include <stddef.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <sys/mman.h>
37 #include <sys/mount.h>
38 #include <sys/param.h>
39 #include <sys/prctl.h>
40 #include <sys/stat.h>
41 #include <sys/types.h>
42 #include <sys/vfs.h>
43 #include <sys/wait.h>
44
45 #include "log.h"
46 #include "lxclock.h"
47 #include "namespace.h"
48 #include "utils.h"
49
50 #ifndef PR_SET_MM
51 #define PR_SET_MM 35
52 #endif
53
54 #ifndef PR_SET_MM_MAP
55 #define PR_SET_MM_MAP 14
56
57 struct prctl_mm_map {
58 uint64_t start_code;
59 uint64_t end_code;
60 uint64_t start_data;
61 uint64_t end_data;
62 uint64_t start_brk;
63 uint64_t brk;
64 uint64_t start_stack;
65 uint64_t arg_start;
66 uint64_t arg_end;
67 uint64_t env_start;
68 uint64_t env_end;
69 uint64_t *auxv;
70 uint32_t auxv_size;
71 uint32_t exe_fd;
72 };
73 #endif
74
75 #ifndef O_PATH
76 #define O_PATH 010000000
77 #endif
78
79 #ifndef O_NOFOLLOW
80 #define O_NOFOLLOW 00400000
81 #endif
82
83 lxc_log_define(lxc_utils, lxc);
84
85 /*
86 * if path is btrfs, tries to remove it and any subvolumes beneath it
87 */
88 extern bool btrfs_try_remove_subvol(const char *path);
89
90 static int _recursive_rmdir(char *dirname, dev_t pdev,
91 const char *exclude, int level, bool onedev)
92 {
93 struct dirent dirent, *direntp;
94 DIR *dir;
95 int ret, failed=0;
96 char pathname[MAXPATHLEN];
97 bool hadexclude = false;
98
99 dir = opendir(dirname);
100 if (!dir) {
101 ERROR("%s: failed to open %s", __func__, dirname);
102 return -1;
103 }
104
105 while (!readdir_r(dir, &dirent, &direntp)) {
106 struct stat mystat;
107 int rc;
108
109 if (!direntp)
110 break;
111
112 if (!strcmp(direntp->d_name, ".") ||
113 !strcmp(direntp->d_name, ".."))
114 continue;
115
116 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
117 if (rc < 0 || rc >= MAXPATHLEN) {
118 ERROR("pathname too long");
119 failed=1;
120 continue;
121 }
122
123 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
124 ret = rmdir(pathname);
125 if (ret < 0) {
126 switch(errno) {
127 case ENOTEMPTY:
128 INFO("Not deleting snapshot %s", pathname);
129 hadexclude = true;
130 break;
131 case ENOTDIR:
132 ret = unlink(pathname);
133 if (ret)
134 INFO("%s: failed to remove %s", __func__, pathname);
135 break;
136 default:
137 SYSERROR("%s: failed to rmdir %s", __func__, pathname);
138 failed = 1;
139 break;
140 }
141 }
142 continue;
143 }
144
145 ret = lstat(pathname, &mystat);
146 if (ret) {
147 ERROR("%s: failed to stat %s", __func__, pathname);
148 failed = 1;
149 continue;
150 }
151 if (onedev && mystat.st_dev != pdev) {
152 /* TODO should we be checking /proc/self/mountinfo for
153 * pathname and not doing this if found? */
154 if (btrfs_try_remove_subvol(pathname))
155 INFO("Removed btrfs subvolume at %s\n", pathname);
156 continue;
157 }
158 if (S_ISDIR(mystat.st_mode)) {
159 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
160 failed=1;
161 } else {
162 if (unlink(pathname) < 0) {
163 SYSERROR("%s: failed to delete %s", __func__, pathname);
164 failed=1;
165 }
166 }
167 }
168
169 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
170 ERROR("%s: failed to delete %s", __func__, dirname);
171 failed=1;
172 }
173
174 ret = closedir(dir);
175 if (ret) {
176 ERROR("%s: failed to close directory %s", __func__, dirname);
177 failed=1;
178 }
179
180 return failed ? -1 : 0;
181 }
182
183 /* we have two different magic values for overlayfs, yay */
184 #define OVERLAYFS_SUPER_MAGIC 0x794c764f
185 #define OVERLAY_SUPER_MAGIC 0x794c7630
186 /*
187 * In overlayfs, st_dev is unreliable. so on overlayfs we don't do
188 * the lxc_rmdir_onedev()
189 */
190 static bool is_native_overlayfs(const char *path)
191 {
192 struct statfs sb;
193
194 if (statfs(path, &sb) < 0)
195 return false;
196 if (sb.f_type == OVERLAYFS_SUPER_MAGIC ||
197 sb.f_type == OVERLAY_SUPER_MAGIC)
198 return true;
199 return false;
200 }
201
202 /* returns 0 on success, -1 if there were any failures */
203 extern int lxc_rmdir_onedev(char *path, const char *exclude)
204 {
205 struct stat mystat;
206 bool onedev = true;
207
208 if (is_native_overlayfs(path)) {
209 onedev = false;
210 }
211
212 if (lstat(path, &mystat) < 0) {
213 if (errno == ENOENT)
214 return 0;
215 ERROR("%s: failed to stat %s", __func__, path);
216 return -1;
217 }
218
219 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
220 }
221
222 /* borrowed from iproute2 */
223 extern int get_u16(unsigned short *val, const char *arg, int base)
224 {
225 unsigned long res;
226 char *ptr;
227
228 if (!arg || !*arg)
229 return -1;
230
231 errno = 0;
232 res = strtoul(arg, &ptr, base);
233 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
234 return -1;
235
236 *val = res;
237
238 return 0;
239 }
240
241 extern int mkdir_p(const char *dir, mode_t mode)
242 {
243 const char *tmp = dir;
244 const char *orig = dir;
245 char *makeme;
246
247 do {
248 dir = tmp + strspn(tmp, "/");
249 tmp = dir + strcspn(dir, "/");
250 makeme = strndup(orig, dir - orig);
251 if (*makeme) {
252 if (mkdir(makeme, mode) && errno != EEXIST) {
253 SYSERROR("failed to create directory '%s'", makeme);
254 free(makeme);
255 return -1;
256 }
257 }
258 free(makeme);
259 } while(tmp != dir);
260
261 return 0;
262 }
263
264 char *get_rundir()
265 {
266 char *rundir;
267 const char *homedir;
268
269 if (geteuid() == 0) {
270 rundir = strdup(RUNTIME_PATH);
271 return rundir;
272 }
273
274 rundir = getenv("XDG_RUNTIME_DIR");
275 if (rundir) {
276 rundir = strdup(rundir);
277 return rundir;
278 }
279
280 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
281 homedir = getenv("HOME");
282 if (!homedir) {
283 ERROR("HOME isn't set in the environment.");
284 return NULL;
285 }
286
287 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
288 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
289
290 return rundir;
291 }
292
293 int wait_for_pid(pid_t pid)
294 {
295 int status, ret;
296
297 again:
298 ret = waitpid(pid, &status, 0);
299 if (ret == -1) {
300 if (errno == EINTR)
301 goto again;
302 return -1;
303 }
304 if (ret != pid)
305 goto again;
306 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
307 return -1;
308 return 0;
309 }
310
311 int lxc_wait_for_pid_status(pid_t pid)
312 {
313 int status, ret;
314
315 again:
316 ret = waitpid(pid, &status, 0);
317 if (ret == -1) {
318 if (errno == EINTR)
319 goto again;
320 return -1;
321 }
322 if (ret != pid)
323 goto again;
324 return status;
325 }
326
327 ssize_t lxc_write_nointr(int fd, const void* buf, size_t count)
328 {
329 ssize_t ret;
330 again:
331 ret = write(fd, buf, count);
332 if (ret < 0 && errno == EINTR)
333 goto again;
334 return ret;
335 }
336
337 ssize_t lxc_read_nointr(int fd, void* buf, size_t count)
338 {
339 ssize_t ret;
340 again:
341 ret = read(fd, buf, count);
342 if (ret < 0 && errno == EINTR)
343 goto again;
344 return ret;
345 }
346
347 ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf)
348 {
349 ssize_t ret;
350 ret = lxc_read_nointr(fd, buf, count);
351 if (ret <= 0)
352 return ret;
353 if ((size_t)ret != count)
354 return -1;
355 if (expected_buf && memcmp(buf, expected_buf, count) != 0) {
356 errno = EINVAL;
357 return -1;
358 }
359 return ret;
360 }
361
362 #if HAVE_LIBGNUTLS
363 #include <gnutls/gnutls.h>
364 #include <gnutls/crypto.h>
365
366 __attribute__((constructor))
367 static void gnutls_lxc_init(void)
368 {
369 gnutls_global_init();
370 }
371
372 int sha1sum_file(char *fnam, unsigned char *digest)
373 {
374 char *buf;
375 int ret;
376 FILE *f;
377 long flen;
378
379 if (!fnam)
380 return -1;
381 f = fopen_cloexec(fnam, "r");
382 if (!f) {
383 SYSERROR("Error opening template");
384 return -1;
385 }
386 if (fseek(f, 0, SEEK_END) < 0) {
387 SYSERROR("Error seeking to end of template");
388 fclose(f);
389 return -1;
390 }
391 if ((flen = ftell(f)) < 0) {
392 SYSERROR("Error telling size of template");
393 fclose(f);
394 return -1;
395 }
396 if (fseek(f, 0, SEEK_SET) < 0) {
397 SYSERROR("Error seeking to start of template");
398 fclose(f);
399 return -1;
400 }
401 if ((buf = malloc(flen+1)) == NULL) {
402 SYSERROR("Out of memory");
403 fclose(f);
404 return -1;
405 }
406 if (fread(buf, 1, flen, f) != flen) {
407 SYSERROR("Failure reading template");
408 free(buf);
409 fclose(f);
410 return -1;
411 }
412 if (fclose(f) < 0) {
413 SYSERROR("Failre closing template");
414 free(buf);
415 return -1;
416 }
417 buf[flen] = '\0';
418 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
419 free(buf);
420 return ret;
421 }
422 #endif
423
424 char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup)
425 {
426 va_list ap2;
427 size_t count = 1 + skip;
428 char **result;
429
430 /* first determine size of argument list, we don't want to reallocate
431 * constantly...
432 */
433 va_copy(ap2, ap);
434 while (1) {
435 char* arg = va_arg(ap2, char*);
436 if (!arg)
437 break;
438 count++;
439 }
440 va_end(ap2);
441
442 result = calloc(count, sizeof(char*));
443 if (!result)
444 return NULL;
445 count = skip;
446 while (1) {
447 char* arg = va_arg(ap, char*);
448 if (!arg)
449 break;
450 arg = do_strdup ? strdup(arg) : arg;
451 if (!arg)
452 goto oom;
453 result[count++] = arg;
454 }
455
456 /* calloc has already set last element to NULL*/
457 return result;
458
459 oom:
460 free(result);
461 return NULL;
462 }
463
464 const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip)
465 {
466 return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0);
467 }
468
469 extern struct lxc_popen_FILE *lxc_popen(const char *command)
470 {
471 struct lxc_popen_FILE *fp = NULL;
472 int parent_end = -1, child_end = -1;
473 int pipe_fds[2];
474 pid_t child_pid;
475
476 int r = pipe2(pipe_fds, O_CLOEXEC);
477
478 if (r < 0) {
479 ERROR("pipe2 failure");
480 return NULL;
481 }
482
483 parent_end = pipe_fds[0];
484 child_end = pipe_fds[1];
485
486 child_pid = fork();
487
488 if (child_pid == 0) {
489 /* child */
490 int child_std_end = STDOUT_FILENO;
491
492 if (child_end != child_std_end) {
493 /* dup2() doesn't dup close-on-exec flag */
494 dup2(child_end, child_std_end);
495
496 /* it's safe not to close child_end here
497 * as it's marked close-on-exec anyway
498 */
499 } else {
500 /*
501 * The descriptor is already the one we will use.
502 * But it must not be marked close-on-exec.
503 * Undo the effects.
504 */
505 if (fcntl(child_end, F_SETFD, 0) != 0) {
506 SYSERROR("Failed to remove FD_CLOEXEC from fd.");
507 exit(127);
508 }
509 }
510
511 /*
512 * Unblock signals.
513 * This is the main/only reason
514 * why we do our lousy popen() emulation.
515 */
516 {
517 sigset_t mask;
518 sigfillset(&mask);
519 sigprocmask(SIG_UNBLOCK, &mask, NULL);
520 }
521
522 execl("/bin/sh", "sh", "-c", command, (char *) NULL);
523 exit(127);
524 }
525
526 /* parent */
527
528 close(child_end);
529 child_end = -1;
530
531 if (child_pid < 0) {
532 ERROR("fork failure");
533 goto error;
534 }
535
536 fp = calloc(1, sizeof(*fp));
537 if (!fp) {
538 ERROR("failed to allocate memory");
539 goto error;
540 }
541
542 fp->f = fdopen(parent_end, "r");
543 if (!fp->f) {
544 ERROR("fdopen failure");
545 goto error;
546 }
547
548 fp->child_pid = child_pid;
549
550 return fp;
551
552 error:
553
554 if (fp) {
555 if (fp->f) {
556 fclose(fp->f);
557 parent_end = -1; /* so we do not close it second time */
558 }
559
560 free(fp);
561 }
562
563 if (parent_end != -1)
564 close(parent_end);
565
566 return NULL;
567 }
568
569 extern int lxc_pclose(struct lxc_popen_FILE *fp)
570 {
571 FILE *f = NULL;
572 pid_t child_pid = 0;
573 int wstatus = 0;
574 pid_t wait_pid;
575
576 if (fp) {
577 f = fp->f;
578 child_pid = fp->child_pid;
579 /* free memory (we still need to close file stream) */
580 free(fp);
581 fp = NULL;
582 }
583
584 if (!f || fclose(f)) {
585 ERROR("fclose failure");
586 return -1;
587 }
588
589 do {
590 wait_pid = waitpid(child_pid, &wstatus, 0);
591 } while (wait_pid == -1 && errno == EINTR);
592
593 if (wait_pid == -1) {
594 ERROR("waitpid failure");
595 return -1;
596 }
597
598 return wstatus;
599 }
600
601 char *lxc_string_replace(const char *needle, const char *replacement, const char *haystack)
602 {
603 ssize_t len = -1, saved_len = -1;
604 char *result = NULL;
605 size_t replacement_len = strlen(replacement);
606 size_t needle_len = strlen(needle);
607
608 /* should be executed exactly twice */
609 while (len == -1 || result == NULL) {
610 char *p;
611 char *last_p;
612 ssize_t part_len;
613
614 if (len != -1) {
615 result = calloc(1, len + 1);
616 if (!result)
617 return NULL;
618 saved_len = len;
619 }
620
621 len = 0;
622
623 for (last_p = (char *)haystack, p = strstr(last_p, needle); p; last_p = p, p = strstr(last_p, needle)) {
624 part_len = (ssize_t)(p - last_p);
625 if (result && part_len > 0)
626 memcpy(&result[len], last_p, part_len);
627 len += part_len;
628 if (result && replacement_len > 0)
629 memcpy(&result[len], replacement, replacement_len);
630 len += replacement_len;
631 p += needle_len;
632 }
633 part_len = strlen(last_p);
634 if (result && part_len > 0)
635 memcpy(&result[len], last_p, part_len);
636 len += part_len;
637 }
638
639 /* make sure we did the same thing twice,
640 * once for calculating length, the other
641 * time for copying data */
642 assert(saved_len == len);
643 /* make sure we didn't overwrite any buffer,
644 * due to calloc the string should be 0-terminated */
645 assert(result[len] == '\0');
646
647 return result;
648 }
649
650 bool lxc_string_in_array(const char *needle, const char **haystack)
651 {
652 for (; haystack && *haystack; haystack++)
653 if (!strcmp(needle, *haystack))
654 return true;
655 return false;
656 }
657
658 char *lxc_string_join(const char *sep, const char **parts, bool use_as_prefix)
659 {
660 char *result;
661 char **p;
662 size_t sep_len = strlen(sep);
663 size_t result_len = use_as_prefix * sep_len;
664
665 /* calculate new string length */
666 for (p = (char **)parts; *p; p++)
667 result_len += (p > (char **)parts) * sep_len + strlen(*p);
668
669 result = calloc(result_len + 1, 1);
670 if (!result)
671 return NULL;
672
673 if (use_as_prefix)
674 strcpy(result, sep);
675 for (p = (char **)parts; *p; p++) {
676 if (p > (char **)parts)
677 strcat(result, sep);
678 strcat(result, *p);
679 }
680
681 return result;
682 }
683
684 char **lxc_normalize_path(const char *path)
685 {
686 char **components;
687 char **p;
688 size_t components_len = 0;
689 size_t pos = 0;
690
691 components = lxc_string_split(path, '/');
692 if (!components)
693 return NULL;
694 for (p = components; *p; p++)
695 components_len++;
696
697 /* resolve '.' and '..' */
698 for (pos = 0; pos < components_len; ) {
699 if (!strcmp(components[pos], ".") || (!strcmp(components[pos], "..") && pos == 0)) {
700 /* eat this element */
701 free(components[pos]);
702 memmove(&components[pos], &components[pos+1], sizeof(char *) * (components_len - pos));
703 components_len--;
704 } else if (!strcmp(components[pos], "..")) {
705 /* eat this and the previous element */
706 free(components[pos - 1]);
707 free(components[pos]);
708 memmove(&components[pos-1], &components[pos+1], sizeof(char *) * (components_len - pos));
709 components_len -= 2;
710 pos--;
711 } else {
712 pos++;
713 }
714 }
715
716 return components;
717 }
718
719 char *lxc_append_paths(const char *first, const char *second)
720 {
721 size_t len = strlen(first) + strlen(second) + 1;
722 const char *pattern = "%s%s";
723 char *result = NULL;
724
725 if (second[0] != '/') {
726 len += 1;
727 pattern = "%s/%s";
728 }
729
730 result = calloc(1, len);
731 if (!result)
732 return NULL;
733
734 snprintf(result, len, pattern, first, second);
735 return result;
736 }
737
738 bool lxc_string_in_list(const char *needle, const char *haystack, char _sep)
739 {
740 char *token, *str, *saveptr = NULL;
741 char sep[2] = { _sep, '\0' };
742
743 if (!haystack || !needle)
744 return 0;
745
746 str = alloca(strlen(haystack)+1);
747 strcpy(str, haystack);
748 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
749 if (strcmp(needle, token) == 0)
750 return 1;
751 }
752
753 return 0;
754 }
755
756 char **lxc_string_split(const char *string, char _sep)
757 {
758 char *token, *str, *saveptr = NULL;
759 char sep[2] = { _sep, '\0' };
760 char **result = NULL;
761 size_t result_capacity = 0;
762 size_t result_count = 0;
763 int r, saved_errno;
764
765 if (!string)
766 return calloc(1, sizeof(char *));
767
768 str = alloca(strlen(string)+1);
769 strcpy(str, string);
770 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
771 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
772 if (r < 0)
773 goto error_out;
774 result[result_count] = strdup(token);
775 if (!result[result_count])
776 goto error_out;
777 result_count++;
778 }
779
780 /* if we allocated too much, reduce it */
781 return realloc(result, (result_count + 1) * sizeof(char *));
782 error_out:
783 saved_errno = errno;
784 lxc_free_array((void **)result, free);
785 errno = saved_errno;
786 return NULL;
787 }
788
789 char **lxc_string_split_and_trim(const char *string, char _sep)
790 {
791 char *token, *str, *saveptr = NULL;
792 char sep[2] = { _sep, '\0' };
793 char **result = NULL;
794 size_t result_capacity = 0;
795 size_t result_count = 0;
796 int r, saved_errno;
797 size_t i = 0;
798
799 if (!string)
800 return calloc(1, sizeof(char *));
801
802 str = alloca(strlen(string)+1);
803 strcpy(str, string);
804 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
805 while (token[0] == ' ' || token[0] == '\t')
806 token++;
807 i = strlen(token);
808 while (i > 0 && (token[i - 1] == ' ' || token[i - 1] == '\t')) {
809 token[i - 1] = '\0';
810 i--;
811 }
812 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
813 if (r < 0)
814 goto error_out;
815 result[result_count] = strdup(token);
816 if (!result[result_count])
817 goto error_out;
818 result_count++;
819 }
820
821 /* if we allocated too much, reduce it */
822 return realloc(result, (result_count + 1) * sizeof(char *));
823 error_out:
824 saved_errno = errno;
825 lxc_free_array((void **)result, free);
826 errno = saved_errno;
827 return NULL;
828 }
829
830 void lxc_free_array(void **array, lxc_free_fn element_free_fn)
831 {
832 void **p;
833 for (p = array; p && *p; p++)
834 element_free_fn(*p);
835 free((void*)array);
836 }
837
838 int lxc_grow_array(void ***array, size_t* capacity, size_t new_size, size_t capacity_increment)
839 {
840 size_t new_capacity;
841 void **new_array;
842
843 /* first time around, catch some trivial mistakes of the user
844 * only initializing one of these */
845 if (!*array || !*capacity) {
846 *array = NULL;
847 *capacity = 0;
848 }
849
850 new_capacity = *capacity;
851 while (new_size + 1 > new_capacity)
852 new_capacity += capacity_increment;
853 if (new_capacity != *capacity) {
854 /* we have to reallocate */
855 new_array = realloc(*array, new_capacity * sizeof(void *));
856 if (!new_array)
857 return -1;
858 memset(&new_array[*capacity], 0, (new_capacity - (*capacity)) * sizeof(void *));
859 *array = new_array;
860 *capacity = new_capacity;
861 }
862
863 /* array has sufficient elements */
864 return 0;
865 }
866
867 size_t lxc_array_len(void **array)
868 {
869 void **p;
870 size_t result = 0;
871
872 for (p = array; p && *p; p++)
873 result++;
874
875 return result;
876 }
877
878 int lxc_write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
879 {
880 int fd, saved_errno;
881 ssize_t ret;
882
883 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
884 if (fd < 0)
885 return -1;
886 ret = lxc_write_nointr(fd, buf, count);
887 if (ret < 0)
888 goto out_error;
889 if ((size_t)ret != count)
890 goto out_error;
891 if (add_newline) {
892 ret = lxc_write_nointr(fd, "\n", 1);
893 if (ret != 1)
894 goto out_error;
895 }
896 close(fd);
897 return 0;
898
899 out_error:
900 saved_errno = errno;
901 close(fd);
902 errno = saved_errno;
903 return -1;
904 }
905
906 int lxc_read_from_file(const char *filename, void* buf, size_t count)
907 {
908 int fd = -1, saved_errno;
909 ssize_t ret;
910
911 fd = open(filename, O_RDONLY | O_CLOEXEC);
912 if (fd < 0)
913 return -1;
914
915 if (!buf || !count) {
916 char buf2[100];
917 size_t count2 = 0;
918 while ((ret = read(fd, buf2, 100)) > 0)
919 count2 += ret;
920 if (ret >= 0)
921 ret = count2;
922 } else {
923 memset(buf, 0, count);
924 ret = read(fd, buf, count);
925 }
926
927 if (ret < 0)
928 ERROR("read %s: %s", filename, strerror(errno));
929
930 saved_errno = errno;
931 close(fd);
932 errno = saved_errno;
933 return ret;
934 }
935
936 void **lxc_append_null_to_array(void **array, size_t count)
937 {
938 void **temp;
939
940 /* Append NULL to the array */
941 if (count) {
942 temp = realloc(array, (count + 1) * sizeof(*array));
943 if (!temp) {
944 size_t i;
945 for (i = 0; i < count; i++)
946 free(array[i]);
947 free(array);
948 return NULL;
949 }
950 array = temp;
951 array[count] = NULL;
952 }
953 return array;
954 }
955
956 int randseed(bool srand_it)
957 {
958 /*
959 srand pre-seed function based on /dev/urandom
960 */
961 unsigned int seed=time(NULL)+getpid();
962
963 FILE *f;
964 f = fopen("/dev/urandom", "r");
965 if (f) {
966 int ret = fread(&seed, sizeof(seed), 1, f);
967 if (ret != 1)
968 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno));
969 fclose(f);
970 }
971
972 if (srand_it)
973 srand(seed);
974
975 return seed;
976 }
977
978 uid_t get_ns_uid(uid_t orig)
979 {
980 char *line = NULL;
981 size_t sz = 0;
982 uid_t nsid, hostid, range;
983 FILE *f = fopen("/proc/self/uid_map", "r");
984 if (!f)
985 return 0;
986
987 while (getline(&line, &sz, f) != -1) {
988 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
989 continue;
990 if (hostid <= orig && hostid + range > orig) {
991 nsid += orig - hostid;
992 goto found;
993 }
994 }
995
996 nsid = 0;
997 found:
998 fclose(f);
999 free(line);
1000 return nsid;
1001 }
1002
1003 bool dir_exists(const char *path)
1004 {
1005 struct stat sb;
1006 int ret;
1007
1008 ret = stat(path, &sb);
1009 if (ret < 0)
1010 // could be something other than eexist, just say no
1011 return false;
1012 return S_ISDIR(sb.st_mode);
1013 }
1014
1015 /* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1016 * FNV has good anti collision properties and we're not worried
1017 * about pre-image resistance or one-way-ness, we're just trying to make
1018 * the name unique in the 108 bytes of space we have.
1019 */
1020 uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1021 {
1022 unsigned char *bp;
1023
1024 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1025 {
1026 /* xor the bottom with the current octet */
1027 hval ^= (uint64_t)*bp;
1028
1029 /* gcc optimised:
1030 * multiply by the 64 bit FNV magic prime mod 2^64
1031 */
1032 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1033 (hval << 7) + (hval << 8) + (hval << 40);
1034 }
1035
1036 return hval;
1037 }
1038
1039 /*
1040 * Detect whether / is mounted MS_SHARED. The only way I know of to
1041 * check that is through /proc/self/mountinfo.
1042 * I'm only checking for /. If the container rootfs or mount location
1043 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1044 * out would be too much work to be worth it.
1045 */
1046 #define LINELEN 4096
1047 int detect_shared_rootfs(void)
1048 {
1049 char buf[LINELEN], *p;
1050 FILE *f;
1051 int i;
1052 char *p2;
1053
1054 f = fopen("/proc/self/mountinfo", "r");
1055 if (!f)
1056 return 0;
1057 while (fgets(buf, LINELEN, f)) {
1058 for (p = buf, i=0; p && i < 4; i++)
1059 p = strchr(p+1, ' ');
1060 if (!p)
1061 continue;
1062 p2 = strchr(p+1, ' ');
1063 if (!p2)
1064 continue;
1065 *p2 = '\0';
1066 if (strcmp(p+1, "/") == 0) {
1067 // this is '/'. is it shared?
1068 p = strchr(p2+1, ' ');
1069 if (p && strstr(p, "shared:")) {
1070 fclose(f);
1071 return 1;
1072 }
1073 }
1074 }
1075 fclose(f);
1076 return 0;
1077 }
1078
1079 bool switch_to_ns(pid_t pid, const char *ns) {
1080 int fd, ret;
1081 char nspath[MAXPATHLEN];
1082
1083 /* Switch to new ns */
1084 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
1085 if (ret < 0 || ret >= MAXPATHLEN)
1086 return false;
1087
1088 fd = open(nspath, O_RDONLY);
1089 if (fd < 0) {
1090 SYSERROR("failed to open %s", nspath);
1091 return false;
1092 }
1093
1094 ret = setns(fd, 0);
1095 if (ret) {
1096 SYSERROR("failed to set process %d to %s of %d.", pid, ns, fd);
1097 close(fd);
1098 return false;
1099 }
1100 close(fd);
1101 return true;
1102 }
1103
1104 /*
1105 * looking at fs/proc_namespace.c, it appears we can
1106 * actually expect the rootfs entry to very specifically contain
1107 * " - rootfs rootfs "
1108 * IIUC, so long as we've chrooted so that rootfs is not our root,
1109 * the rootfs entry should always be skipped in mountinfo contents.
1110 */
1111 int detect_ramfs_rootfs(void)
1112 {
1113 char buf[LINELEN], *p;
1114 FILE *f;
1115 int i;
1116 char *p2;
1117
1118 f = fopen("/proc/self/mountinfo", "r");
1119 if (!f)
1120 return 0;
1121 while (fgets(buf, LINELEN, f)) {
1122 for (p = buf, i=0; p && i < 4; i++)
1123 p = strchr(p+1, ' ');
1124 if (!p)
1125 continue;
1126 p2 = strchr(p+1, ' ');
1127 if (!p2)
1128 continue;
1129 *p2 = '\0';
1130 if (strcmp(p+1, "/") == 0) {
1131 // this is '/'. is it the ramfs?
1132 p = strchr(p2+1, '-');
1133 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
1134 fclose(f);
1135 return 1;
1136 }
1137 }
1138 }
1139 fclose(f);
1140 return 0;
1141 }
1142
1143 char *on_path(char *cmd, const char *rootfs) {
1144 char *path = NULL;
1145 char *entry = NULL;
1146 char *saveptr = NULL;
1147 char cmdpath[MAXPATHLEN];
1148 int ret;
1149
1150 path = getenv("PATH");
1151 if (!path)
1152 return NULL;
1153
1154 path = strdup(path);
1155 if (!path)
1156 return NULL;
1157
1158 entry = strtok_r(path, ":", &saveptr);
1159 while (entry) {
1160 if (rootfs)
1161 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs, entry, cmd);
1162 else
1163 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
1164
1165 if (ret < 0 || ret >= MAXPATHLEN)
1166 goto next_loop;
1167
1168 if (access(cmdpath, X_OK) == 0) {
1169 free(path);
1170 return strdup(cmdpath);
1171 }
1172
1173 next_loop:
1174 entry = strtok_r(NULL, ":", &saveptr);
1175 }
1176
1177 free(path);
1178 return NULL;
1179 }
1180
1181 bool file_exists(const char *f)
1182 {
1183 struct stat statbuf;
1184
1185 return stat(f, &statbuf) == 0;
1186 }
1187
1188 bool cgns_supported(void)
1189 {
1190 return file_exists("/proc/self/ns/cgroup");
1191 }
1192
1193 /* historically lxc-init has been under /usr/lib/lxc and under
1194 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1195 */
1196 char *choose_init(const char *rootfs)
1197 {
1198 char *retv = NULL;
1199 const char *empty = "",
1200 *tmp;
1201 int ret, env_set = 0;
1202 struct stat mystat;
1203
1204 if (!getenv("PATH")) {
1205 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1206 SYSERROR("Failed to setenv");
1207 env_set = 1;
1208 }
1209
1210 retv = on_path("init.lxc", rootfs);
1211
1212 if (env_set) {
1213 if (unsetenv("PATH"))
1214 SYSERROR("Failed to unsetenv");
1215 }
1216
1217 if (retv)
1218 return retv;
1219
1220 retv = malloc(PATH_MAX);
1221 if (!retv)
1222 return NULL;
1223
1224 if (rootfs)
1225 tmp = rootfs;
1226 else
1227 tmp = empty;
1228
1229 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
1230 if (ret < 0 || ret >= PATH_MAX) {
1231 ERROR("pathname too long");
1232 goto out1;
1233 }
1234
1235 ret = stat(retv, &mystat);
1236 if (ret == 0)
1237 return retv;
1238
1239 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
1240 if (ret < 0 || ret >= PATH_MAX) {
1241 ERROR("pathname too long");
1242 goto out1;
1243 }
1244
1245 ret = stat(retv, &mystat);
1246 if (ret == 0)
1247 return retv;
1248
1249 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
1250 if (ret < 0 || ret >= PATH_MAX) {
1251 ERROR("pathname too long");
1252 goto out1;
1253 }
1254 ret = stat(retv, &mystat);
1255 if (ret == 0)
1256 return retv;
1257
1258 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
1259 if (ret < 0 || ret >= PATH_MAX) {
1260 ERROR("pathname too long");
1261 goto out1;
1262 }
1263 ret = stat(retv, &mystat);
1264 if (ret == 0)
1265 return retv;
1266
1267 /*
1268 * Last resort, look for the statically compiled init.lxc which we
1269 * hopefully bind-mounted in.
1270 * If we are called during container setup, and we get to this point,
1271 * then the init.lxc.static from the host will need to be bind-mounted
1272 * in. So we return NULL here to indicate that.
1273 */
1274 if (rootfs)
1275 goto out1;
1276
1277 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
1278 if (ret < 0 || ret >= PATH_MAX) {
1279 WARN("Nonsense - name /lxc.init.static too long");
1280 goto out1;
1281 }
1282 ret = stat(retv, &mystat);
1283 if (ret == 0)
1284 return retv;
1285
1286 out1:
1287 free(retv);
1288 return NULL;
1289 }
1290
1291 int print_to_file(const char *file, const char *content)
1292 {
1293 FILE *f;
1294 int ret = 0;
1295
1296 f = fopen(file, "w");
1297 if (!f)
1298 return -1;
1299 if (fprintf(f, "%s", content) != strlen(content))
1300 ret = -1;
1301 fclose(f);
1302 return ret;
1303 }
1304
1305 int is_dir(const char *path)
1306 {
1307 struct stat statbuf;
1308 int ret = stat(path, &statbuf);
1309 if (ret == 0 && S_ISDIR(statbuf.st_mode))
1310 return 1;
1311 return 0;
1312 }
1313
1314 /*
1315 * Given the '-t' template option to lxc-create, figure out what to
1316 * do. If the template is a full executable path, use that. If it
1317 * is something like 'sshd', then return $templatepath/lxc-sshd.
1318 * On success return the template, on error return NULL.
1319 */
1320 char *get_template_path(const char *t)
1321 {
1322 int ret, len;
1323 char *tpath;
1324
1325 if (t[0] == '/' && access(t, X_OK) == 0) {
1326 tpath = strdup(t);
1327 return tpath;
1328 }
1329
1330 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
1331 tpath = malloc(len);
1332 if (!tpath)
1333 return NULL;
1334 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
1335 if (ret < 0 || ret >= len) {
1336 free(tpath);
1337 return NULL;
1338 }
1339 if (access(tpath, X_OK) < 0) {
1340 SYSERROR("bad template: %s", t);
1341 free(tpath);
1342 return NULL;
1343 }
1344
1345 return tpath;
1346 }
1347
1348 /*
1349 * Sets the process title to the specified title. Note:
1350 * 1. this function requires root to succeed
1351 * 2. it clears /proc/self/environ
1352 * 3. it may not succed (e.g. if title is longer than /proc/self/environ +
1353 * the original title)
1354 */
1355 int setproctitle(char *title)
1356 {
1357 static char *proctitle = NULL;
1358 char buf[2048], *tmp;
1359 FILE *f;
1360 int i, len, ret = 0;
1361
1362 /* We don't really need to know all of this stuff, but unfortunately
1363 * PR_SET_MM_MAP requires us to set it all at once, so we have to
1364 * figure it out anyway.
1365 */
1366 unsigned long start_data, end_data, start_brk, start_code, end_code,
1367 start_stack, arg_start, arg_end, env_start, env_end,
1368 brk_val;
1369 struct prctl_mm_map prctl_map;
1370
1371 f = fopen_cloexec("/proc/self/stat", "r");
1372 if (!f) {
1373 return -1;
1374 }
1375
1376 tmp = fgets(buf, sizeof(buf), f);
1377 fclose(f);
1378 if (!tmp) {
1379 return -1;
1380 }
1381
1382 /* Skip the first 25 fields, column 26-28 are start_code, end_code,
1383 * and start_stack */
1384 tmp = strchr(buf, ' ');
1385 for (i = 0; i < 24; i++) {
1386 if (!tmp)
1387 return -1;
1388 tmp = strchr(tmp+1, ' ');
1389 }
1390 if (!tmp)
1391 return -1;
1392
1393 i = sscanf(tmp, "%lu %lu %lu", &start_code, &end_code, &start_stack);
1394 if (i != 3)
1395 return -1;
1396
1397 /* Skip the next 19 fields, column 45-51 are start_data to arg_end */
1398 for (i = 0; i < 19; i++) {
1399 if (!tmp)
1400 return -1;
1401 tmp = strchr(tmp+1, ' ');
1402 }
1403
1404 if (!tmp)
1405 return -1;
1406
1407 i = sscanf(tmp, "%lu %lu %lu %lu %lu %lu %lu",
1408 &start_data,
1409 &end_data,
1410 &start_brk,
1411 &arg_start,
1412 &arg_end,
1413 &env_start,
1414 &env_end);
1415 if (i != 7)
1416 return -1;
1417
1418 /* Include the null byte here, because in the calculations below we
1419 * want to have room for it. */
1420 len = strlen(title) + 1;
1421
1422 /* If we don't have enough room by just overwriting the old proctitle,
1423 * let's allocate a new one.
1424 */
1425 if (len > arg_end - arg_start) {
1426 void *m;
1427 m = realloc(proctitle, len);
1428 if (!m)
1429 return -1;
1430 proctitle = m;
1431
1432 arg_start = (unsigned long) proctitle;
1433 }
1434
1435 arg_end = arg_start + len;
1436
1437 brk_val = syscall(__NR_brk, 0);
1438
1439 prctl_map = (struct prctl_mm_map) {
1440 .start_code = start_code,
1441 .end_code = end_code,
1442 .start_stack = start_stack,
1443 .start_data = start_data,
1444 .end_data = end_data,
1445 .start_brk = start_brk,
1446 .brk = brk_val,
1447 .arg_start = arg_start,
1448 .arg_end = arg_end,
1449 .env_start = env_start,
1450 .env_end = env_end,
1451 .auxv = NULL,
1452 .auxv_size = 0,
1453 .exe_fd = -1,
1454 };
1455
1456 ret = prctl(PR_SET_MM, PR_SET_MM_MAP, (long) &prctl_map, sizeof(prctl_map), 0);
1457 if (ret == 0)
1458 strcpy((char*)arg_start, title);
1459 else
1460 INFO("setting cmdline failed - %s", strerror(errno));
1461
1462 return ret;
1463 }
1464
1465 /*
1466 * @path: a pathname where / replaced with '\0'.
1467 * @offsetp: pointer to int showing which path segment was last seen.
1468 * Updated on return to reflect the next segment.
1469 * @fulllen: full original path length.
1470 * Returns a pointer to the next path segment, or NULL if done.
1471 */
1472 static char *get_nextpath(char *path, int *offsetp, int fulllen)
1473 {
1474 int offset = *offsetp;
1475
1476 if (offset >= fulllen)
1477 return NULL;
1478
1479 while (path[offset] != '\0' && offset < fulllen)
1480 offset++;
1481 while (path[offset] == '\0' && offset < fulllen)
1482 offset++;
1483
1484 *offsetp = offset;
1485 return (offset < fulllen) ? &path[offset] : NULL;
1486 }
1487
1488 /*
1489 * Check that @subdir is a subdir of @dir. @len is the length of
1490 * @dir (to avoid having to recalculate it).
1491 */
1492 static bool is_subdir(const char *subdir, const char *dir, size_t len)
1493 {
1494 size_t subdirlen = strlen(subdir);
1495
1496 if (subdirlen < len)
1497 return false;
1498 if (strncmp(subdir, dir, len) != 0)
1499 return false;
1500 if (dir[len-1] == '/')
1501 return true;
1502 if (subdir[len] == '/' || subdirlen == len)
1503 return true;
1504 return false;
1505 }
1506
1507 /*
1508 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
1509 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
1510 */
1511 static int check_symlink(int fd)
1512 {
1513 struct stat sb;
1514 int ret = fstat(fd, &sb);
1515 if (ret < 0)
1516 return -ENOENT;
1517 if (S_ISLNK(sb.st_mode))
1518 return -ELOOP;
1519 return 0;
1520 }
1521
1522 /*
1523 * Open a file or directory, provided that it contains no symlinks.
1524 *
1525 * CAVEAT: This function must not be used for other purposes than container
1526 * setup before executing the container's init
1527 */
1528 static int open_if_safe(int dirfd, const char *nextpath)
1529 {
1530 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1531 if (newfd >= 0) // was not a symlink, all good
1532 return newfd;
1533
1534 if (errno == ELOOP)
1535 return newfd;
1536
1537 if (errno == EPERM || errno == EACCES) {
1538 /* we're not root (cause we got EPERM) so
1539 try opening with O_PATH */
1540 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
1541 if (newfd >= 0) {
1542 /* O_PATH will return an fd for symlinks. We know
1543 * nextpath wasn't a symlink at last openat, so if fd
1544 * is now a link, then something * fishy is going on
1545 */
1546 int ret = check_symlink(newfd);
1547 if (ret < 0) {
1548 close(newfd);
1549 newfd = ret;
1550 }
1551 }
1552 }
1553
1554 return newfd;
1555 }
1556
1557 /*
1558 * Open a path intending for mounting, ensuring that the final path
1559 * is inside the container's rootfs.
1560 *
1561 * CAVEAT: This function must not be used for other purposes than container
1562 * setup before executing the container's init
1563 *
1564 * @target: path to be opened
1565 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1566 * would be the container's rootfs.
1567 *
1568 * Return an open fd for the path, or <0 on error.
1569 */
1570 static int open_without_symlink(const char *target, const char *prefix_skip)
1571 {
1572 int curlen = 0, dirfd, fulllen, i;
1573 char *dup = NULL;
1574
1575 fulllen = strlen(target);
1576
1577 /* make sure prefix-skip makes sense */
1578 if (prefix_skip && strlen(prefix_skip) > 0) {
1579 curlen = strlen(prefix_skip);
1580 if (!is_subdir(target, prefix_skip, curlen)) {
1581 ERROR("WHOA there - target '%s' didn't start with prefix '%s'",
1582 target, prefix_skip);
1583 return -EINVAL;
1584 }
1585 /*
1586 * get_nextpath() expects the curlen argument to be
1587 * on a (turned into \0) / or before it, so decrement
1588 * curlen to make sure that happens
1589 */
1590 if (curlen)
1591 curlen--;
1592 } else {
1593 prefix_skip = "/";
1594 curlen = 0;
1595 }
1596
1597 /* Make a copy of target which we can hack up, and tokenize it */
1598 if ((dup = strdup(target)) == NULL) {
1599 SYSERROR("Out of memory checking for symbolic link");
1600 return -ENOMEM;
1601 }
1602 for (i = 0; i < fulllen; i++) {
1603 if (dup[i] == '/')
1604 dup[i] = '\0';
1605 }
1606
1607 dirfd = open(prefix_skip, O_RDONLY);
1608 if (dirfd < 0)
1609 goto out;
1610 while (1) {
1611 int newfd, saved_errno;
1612 char *nextpath;
1613
1614 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1615 goto out;
1616 newfd = open_if_safe(dirfd, nextpath);
1617 saved_errno = errno;
1618 close(dirfd);
1619 dirfd = newfd;
1620 if (newfd < 0) {
1621 errno = saved_errno;
1622 if (errno == ELOOP)
1623 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
1624 goto out;
1625 }
1626 }
1627
1628 out:
1629 free(dup);
1630 return dirfd;
1631 }
1632
1633 /*
1634 * Safely mount a path into a container, ensuring that the mount target
1635 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1636 * uses the host's /)
1637 *
1638 * CAVEAT: This function must not be used for other purposes than container
1639 * setup before executing the container's init
1640 */
1641 int safe_mount(const char *src, const char *dest, const char *fstype,
1642 unsigned long flags, const void *data, const char *rootfs)
1643 {
1644 int srcfd = -1, destfd, ret, saved_errno;
1645 char srcbuf[50], destbuf[50]; // only needs enough for /proc/self/fd/<fd>
1646 const char *mntsrc = src;
1647
1648 if (!rootfs)
1649 rootfs = "";
1650
1651 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1652 if (flags & MS_BIND && src && src[0] != '/') {
1653 INFO("this is a relative bind mount");
1654 srcfd = open_without_symlink(src, NULL);
1655 if (srcfd < 0)
1656 return srcfd;
1657 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1658 if (ret < 0 || ret > 50) {
1659 close(srcfd);
1660 ERROR("Out of memory");
1661 return -EINVAL;
1662 }
1663 mntsrc = srcbuf;
1664 }
1665
1666 destfd = open_without_symlink(dest, rootfs);
1667 if (destfd < 0) {
1668 if (srcfd != -1) {
1669 saved_errno = errno;
1670 close(srcfd);
1671 errno = saved_errno;
1672 }
1673 return destfd;
1674 }
1675
1676 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1677 if (ret < 0 || ret > 50) {
1678 if (srcfd != -1)
1679 close(srcfd);
1680 close(destfd);
1681 ERROR("Out of memory");
1682 return -EINVAL;
1683 }
1684
1685 ret = mount(mntsrc, destbuf, fstype, flags, data);
1686 saved_errno = errno;
1687 if (srcfd != -1)
1688 close(srcfd);
1689 close(destfd);
1690 if (ret < 0) {
1691 errno = saved_errno;
1692 SYSERROR("Failed to mount %s onto %s", src, dest);
1693 return ret;
1694 }
1695
1696 return 0;
1697 }
1698
1699 /*
1700 * Mount a proc under @rootfs if proc self points to a pid other than
1701 * my own. This is needed to have a known-good proc mount for setting
1702 * up LSMs both at container startup and attach.
1703 *
1704 * @rootfs : the rootfs where proc should be mounted
1705 *
1706 * Returns < 0 on failure, 0 if the correct proc was already mounted
1707 * and 1 if a new proc was mounted.
1708 *
1709 * NOTE: not to be called from inside the container namespace!
1710 */
1711 int mount_proc_if_needed(const char *rootfs)
1712 {
1713 char path[MAXPATHLEN];
1714 char link[20];
1715 int linklen, ret;
1716 int mypid;
1717
1718 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1719 if (ret < 0 || ret >= MAXPATHLEN) {
1720 SYSERROR("proc path name too long");
1721 return -1;
1722 }
1723 memset(link, 0, 20);
1724 linklen = readlink(path, link, 20);
1725 mypid = (int)getpid();
1726 INFO("I am %d, /proc/self points to '%s'", mypid, link);
1727 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
1728 if (ret < 0 || ret >= MAXPATHLEN) {
1729 SYSERROR("proc path name too long");
1730 return -1;
1731 }
1732 if (linklen < 0) /* /proc not mounted */
1733 goto domount;
1734 if (atoi(link) != mypid) {
1735 /* wrong /procs mounted */
1736 umount2(path, MNT_DETACH); /* ignore failure */
1737 goto domount;
1738 }
1739 /* the right proc is already mounted */
1740 return 0;
1741
1742 domount:
1743 if (!strcmp(rootfs,"")) /* rootfs is NULL */
1744 ret = mount("proc", path, "proc", 0, NULL);
1745 else
1746 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
1747
1748 if (ret < 0)
1749 return -1;
1750
1751 INFO("Mounted /proc in container for security transition");
1752 return 1;
1753 }
1754
1755 int open_devnull(void)
1756 {
1757 int fd = open("/dev/null", O_RDWR);
1758
1759 if (fd < 0)
1760 SYSERROR("Can't open /dev/null");
1761
1762 return fd;
1763 }
1764
1765 int set_stdfds(int fd)
1766 {
1767 if (fd < 0)
1768 return -1;
1769
1770 if (dup2(fd, 0) < 0)
1771 return -1;
1772 if (dup2(fd, 1) < 0)
1773 return -1;
1774 if (dup2(fd, 2) < 0)
1775 return -1;
1776
1777 return 0;
1778 }
1779
1780 int null_stdfds(void)
1781 {
1782 int ret = -1;
1783 int fd = open_devnull();
1784
1785 if (fd >= 0) {
1786 ret = set_stdfds(fd);
1787 close(fd);
1788 }
1789
1790 return ret;
1791 }
1792
1793 /*
1794 * Return the number of lines in file @fn, or -1 on error
1795 */
1796 int lxc_count_file_lines(const char *fn)
1797 {
1798 FILE *f;
1799 char *line = NULL;
1800 size_t sz = 0;
1801 int n = 0;
1802
1803 f = fopen_cloexec(fn, "r");
1804 if (!f)
1805 return -1;
1806
1807 while (getline(&line, &sz, f) != -1) {
1808 n++;
1809 }
1810 free(line);
1811 fclose(f);
1812 return n;
1813 }
1814
1815 void *lxc_strmmap(void *addr, size_t length, int prot, int flags, int fd,
1816 off_t offset)
1817 {
1818 void *tmp = NULL, *overlap = NULL;
1819
1820 /* We establish an anonymous mapping that is one byte larger than the
1821 * underlying file. The pages handed to us are zero filled. */
1822 tmp = mmap(addr, length + 1, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1823 if (tmp == MAP_FAILED)
1824 return tmp;
1825
1826 /* Now we establish a fixed-address mapping starting at the address we
1827 * received from our anonymous mapping and replace all bytes excluding
1828 * the additional \0-byte with the file. This allows us to use normal
1829 * string-handling functions. */
1830 overlap = mmap(tmp, length, prot, MAP_FIXED | flags, fd, offset);
1831 if (overlap == MAP_FAILED)
1832 munmap(tmp, length + 1);
1833
1834 return overlap;
1835 }
1836
1837 int lxc_strmunmap(void *addr, size_t length)
1838 {
1839 return munmap(addr, length + 1);
1840 }
1841
1842 /* Check whether a signal is blocked by a process. */
1843 bool task_blocking_signal(pid_t pid, int signal)
1844 {
1845 bool bret = false;
1846 char *line = NULL;
1847 long unsigned int sigblk = 0;
1848 size_t n = 0;
1849 int ret;
1850 FILE *f;
1851
1852 /* The largest integer that can fit into long int is 2^64. This is a
1853 * 20-digit number. */
1854 size_t len = /* /proc */ 5 + /* /pid-to-str */ 21 + /* /status */ 7 + /* \0 */ 1;
1855 char status[len];
1856
1857 ret = snprintf(status, len, "/proc/%d/status", pid);
1858 if (ret < 0 || ret >= len)
1859 return bret;
1860
1861 f = fopen(status, "r");
1862 if (!f)
1863 return bret;
1864
1865 while (getline(&line, &n, f) != -1) {
1866 if (!strncmp(line, "SigBlk:\t", 8))
1867 if (sscanf(line + 8, "%lx", &sigblk) != 1)
1868 goto out;
1869 }
1870
1871 if (sigblk & signal)
1872 bret = true;
1873
1874 out:
1875 free(line);
1876 fclose(f);
1877 return bret;
1878 }