]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/utils.c
Merge pull request #1402 from mabes/patch-1
[mirror_lxc.git] / src / lxc / utils.c
CommitLineData
e3642c43
DL
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
e3642c43
DL
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e3642c43
DL
22 */
23
052616eb
ÇO
24#include "config.h"
25
a1e5280d 26#include <dirent.h>
e3642c43 27#include <errno.h>
a1e5280d 28#include <fcntl.h>
dbaf55a3 29#include <grp.h>
a1e5280d 30#include <libgen.h>
d983b93c 31#include <stddef.h>
a1e5280d
CB
32#include <stdio.h>
33#include <stdlib.h>
61a1d519 34#include <string.h>
981f6029 35#include <unistd.h>
e3642c43 36#include <sys/mman.h>
6e4bb2e0 37#include <sys/mount.h>
a1e5280d
CB
38#include <sys/param.h>
39#include <sys/prctl.h>
40#include <sys/stat.h>
9be53773 41#include <sys/types.h>
a1e5280d 42#include <sys/vfs.h>
9be53773 43#include <sys/wait.h>
e3642c43
DL
44
45#include "log.h"
025ed0f3 46#include "lxclock.h"
51d0854c 47#include "namespace.h"
981f6029 48#include "utils.h"
e3642c43 49
5d6ef228
SG
50#ifndef PR_SET_MM
51#define PR_SET_MM 35
52#endif
53
8d2ede58
TA
54#ifndef PR_SET_MM_MAP
55#define PR_SET_MM_MAP 14
553347e4 56
8d2ede58
TA
57struct prctl_mm_map {
58 uint64_t start_code;
59 uint64_t end_code;
60 uint64_t start_data;
61 uint64_t end_data;
62 uint64_t start_brk;
63 uint64_t brk;
64 uint64_t start_stack;
65 uint64_t arg_start;
66 uint64_t arg_end;
67 uint64_t env_start;
68 uint64_t env_end;
69 uint64_t *auxv;
70 uint32_t auxv_size;
71 uint32_t exe_fd;
1adbd020 72};
553347e4
TA
73#endif
74
4928c718
SG
75#ifndef O_PATH
76#define O_PATH 010000000
77#endif
78
79#ifndef O_NOFOLLOW
80#define O_NOFOLLOW 00400000
81#endif
82
e3642c43
DL
83lxc_log_define(lxc_utils, lxc);
84
4295c5de
SH
85/*
86 * if path is btrfs, tries to remove it and any subvolumes beneath it
87 */
88extern bool btrfs_try_remove_subvol(const char *path);
89
0cc417b2
SH
90static int _recursive_rmdir(char *dirname, dev_t pdev,
91 const char *exclude, int level, bool onedev)
60bf62d4 92{
74f96976 93 struct dirent *direntp;
60bf62d4
SH
94 DIR *dir;
95 int ret, failed=0;
96 char pathname[MAXPATHLEN];
18aa217b 97 bool hadexclude = false;
60bf62d4
SH
98
99 dir = opendir(dirname);
100 if (!dir) {
101 ERROR("%s: failed to open %s", __func__, dirname);
4355ab5f 102 return -1;
60bf62d4
SH
103 }
104
74f96976 105 while ((direntp = readdir(dir))) {
60bf62d4
SH
106 struct stat mystat;
107 int rc;
108
109 if (!direntp)
110 break;
111
112 if (!strcmp(direntp->d_name, ".") ||
113 !strcmp(direntp->d_name, ".."))
114 continue;
115
116 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
117 if (rc < 0 || rc >= MAXPATHLEN) {
118 ERROR("pathname too long");
119 failed=1;
120 continue;
121 }
18aa217b
SH
122
123 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
124 ret = rmdir(pathname);
125 if (ret < 0) {
126 switch(errno) {
127 case ENOTEMPTY:
0cc417b2 128 INFO("Not deleting snapshot %s", pathname);
18aa217b
SH
129 hadexclude = true;
130 break;
131 case ENOTDIR:
132 ret = unlink(pathname);
133 if (ret)
134 INFO("%s: failed to remove %s", __func__, pathname);
135 break;
136 default:
137 SYSERROR("%s: failed to rmdir %s", __func__, pathname);
138 failed = 1;
139 break;
140 }
141 }
142 continue;
143 }
144
60bf62d4
SH
145 ret = lstat(pathname, &mystat);
146 if (ret) {
147 ERROR("%s: failed to stat %s", __func__, pathname);
4295c5de 148 failed = 1;
60bf62d4
SH
149 continue;
150 }
4295c5de
SH
151 if (onedev && mystat.st_dev != pdev) {
152 /* TODO should we be checking /proc/self/mountinfo for
153 * pathname and not doing this if found? */
154 if (btrfs_try_remove_subvol(pathname))
155 INFO("Removed btrfs subvolume at %s\n", pathname);
60bf62d4 156 continue;
4295c5de 157 }
60bf62d4 158 if (S_ISDIR(mystat.st_mode)) {
0cc417b2 159 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
60bf62d4
SH
160 failed=1;
161 } else {
162 if (unlink(pathname) < 0) {
0cc417b2 163 SYSERROR("%s: failed to delete %s", __func__, pathname);
60bf62d4
SH
164 failed=1;
165 }
166 }
167 }
168
4295c5de
SH
169 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
170 ERROR("%s: failed to delete %s", __func__, dirname);
171 failed=1;
60bf62d4
SH
172 }
173
025ed0f3 174 ret = closedir(dir);
025ed0f3 175 if (ret) {
60bf62d4
SH
176 ERROR("%s: failed to close directory %s", __func__, dirname);
177 failed=1;
178 }
179
4355ab5f 180 return failed ? -1 : 0;
60bf62d4
SH
181}
182
0cc417b2
SH
183/* we have two different magic values for overlayfs, yay */
184#define OVERLAYFS_SUPER_MAGIC 0x794c764f
185#define OVERLAY_SUPER_MAGIC 0x794c7630
186/*
187 * In overlayfs, st_dev is unreliable. so on overlayfs we don't do
188 * the lxc_rmdir_onedev()
189 */
190static bool is_native_overlayfs(const char *path)
191{
192 struct statfs sb;
193
194 if (statfs(path, &sb) < 0)
195 return false;
196 if (sb.f_type == OVERLAYFS_SUPER_MAGIC ||
197 sb.f_type == OVERLAY_SUPER_MAGIC)
198 return true;
199 return false;
200}
201
4355ab5f 202/* returns 0 on success, -1 if there were any failures */
18aa217b 203extern int lxc_rmdir_onedev(char *path, const char *exclude)
60bf62d4
SH
204{
205 struct stat mystat;
0cc417b2
SH
206 bool onedev = true;
207
208 if (is_native_overlayfs(path)) {
209 onedev = false;
210 }
60bf62d4
SH
211
212 if (lstat(path, &mystat) < 0) {
067650d0
SH
213 if (errno == ENOENT)
214 return 0;
60bf62d4 215 ERROR("%s: failed to stat %s", __func__, path);
4355ab5f 216 return -1;
60bf62d4
SH
217 }
218
0cc417b2 219 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
60bf62d4
SH
220}
221
9ddaf3bf 222/* borrowed from iproute2 */
7c11d57a 223extern int get_u16(unsigned short *val, const char *arg, int base)
9ddaf3bf
JHS
224{
225 unsigned long res;
226 char *ptr;
227
228 if (!arg || !*arg)
229 return -1;
230
09bbd745 231 errno = 0;
9ddaf3bf 232 res = strtoul(arg, &ptr, base);
09bbd745 233 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
9ddaf3bf
JHS
234 return -1;
235
236 *val = res;
237
238 return 0;
239}
240
3ce74686 241extern int mkdir_p(const char *dir, mode_t mode)
1b09f2c0 242{
3ce74686
SH
243 const char *tmp = dir;
244 const char *orig = dir;
860fc865
RW
245 char *makeme;
246
247 do {
248 dir = tmp + strspn(tmp, "/");
249 tmp = dir + strcspn(dir, "/");
d74325c4 250 makeme = strndup(orig, dir - orig);
860fc865
RW
251 if (*makeme) {
252 if (mkdir(makeme, mode) && errno != EEXIST) {
959aee9c 253 SYSERROR("failed to create directory '%s'", makeme);
d74325c4 254 free(makeme);
860fc865
RW
255 return -1;
256 }
257 }
d74325c4 258 free(makeme);
860fc865 259 } while(tmp != dir);
1b09f2c0 260
98663823 261 return 0;
1b09f2c0 262}
2a59a681 263
44b9ae4b 264char *get_rundir()
9e60f51d 265{
97a696c6
SG
266 char *rundir;
267 const char *homedir;
9e60f51d 268
d6470e71 269 if (geteuid() == 0) {
c580b8d2 270 rundir = strdup(RUNTIME_PATH);
d6470e71
SG
271 return rundir;
272 }
97a696c6
SG
273
274 rundir = getenv("XDG_RUNTIME_DIR");
44b9ae4b
SG
275 if (rundir) {
276 rundir = strdup(rundir);
277 return rundir;
278 }
97a696c6 279
44b9ae4b
SG
280 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
281 homedir = getenv("HOME");
282 if (!homedir) {
283 ERROR("HOME isn't set in the environment.");
284 return NULL;
97a696c6
SG
285 }
286
44b9ae4b
SG
287 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
288 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
289
9e60f51d
DE
290 return rundir;
291}
292
9be53773
SH
293int wait_for_pid(pid_t pid)
294{
295 int status, ret;
296
297again:
298 ret = waitpid(pid, &status, 0);
299 if (ret == -1) {
71b9b8ed 300 if (errno == EINTR)
9be53773
SH
301 goto again;
302 return -1;
303 }
304 if (ret != pid)
305 goto again;
306 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
307 return -1;
308 return 0;
309}
c797a220
CS
310
311int lxc_wait_for_pid_status(pid_t pid)
312{
313 int status, ret;
314
315again:
316 ret = waitpid(pid, &status, 0);
317 if (ret == -1) {
318 if (errno == EINTR)
319 goto again;
320 return -1;
321 }
322 if (ret != pid)
323 goto again;
324 return status;
325}
92f023dc 326
650468bb 327ssize_t lxc_write_nointr(int fd, const void* buf, size_t count)
92f023dc 328{
650468bb 329 ssize_t ret;
92f023dc
CS
330again:
331 ret = write(fd, buf, count);
332 if (ret < 0 && errno == EINTR)
333 goto again;
334 return ret;
335}
336
650468bb 337ssize_t lxc_read_nointr(int fd, void* buf, size_t count)
92f023dc 338{
650468bb 339 ssize_t ret;
92f023dc
CS
340again:
341 ret = read(fd, buf, count);
342 if (ret < 0 && errno == EINTR)
343 goto again;
344 return ret;
345}
346
650468bb 347ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf)
92f023dc 348{
650468bb 349 ssize_t ret;
92f023dc
CS
350 ret = lxc_read_nointr(fd, buf, count);
351 if (ret <= 0)
352 return ret;
650468bb 353 if ((size_t)ret != count)
92f023dc
CS
354 return -1;
355 if (expected_buf && memcmp(buf, expected_buf, count) != 0) {
356 errno = EINVAL;
357 return -1;
358 }
359 return ret;
360}
3ce74686
SH
361
362#if HAVE_LIBGNUTLS
363#include <gnutls/gnutls.h>
364#include <gnutls/crypto.h>
41246cee
DE
365
366__attribute__((constructor))
367static void gnutls_lxc_init(void)
368{
369 gnutls_global_init();
370}
371
3ce74686
SH
372int sha1sum_file(char *fnam, unsigned char *digest)
373{
374 char *buf;
375 int ret;
376 FILE *f;
377 long flen;
378
379 if (!fnam)
380 return -1;
025ed0f3 381 f = fopen_cloexec(fnam, "r");
7be677a8 382 if (!f) {
3ce74686
SH
383 SYSERROR("Error opening template");
384 return -1;
385 }
386 if (fseek(f, 0, SEEK_END) < 0) {
387 SYSERROR("Error seeking to end of template");
dd1d77f9 388 fclose(f);
3ce74686
SH
389 return -1;
390 }
391 if ((flen = ftell(f)) < 0) {
392 SYSERROR("Error telling size of template");
dd1d77f9 393 fclose(f);
3ce74686
SH
394 return -1;
395 }
396 if (fseek(f, 0, SEEK_SET) < 0) {
397 SYSERROR("Error seeking to start of template");
dd1d77f9 398 fclose(f);
3ce74686
SH
399 return -1;
400 }
401 if ((buf = malloc(flen+1)) == NULL) {
402 SYSERROR("Out of memory");
dd1d77f9 403 fclose(f);
3ce74686
SH
404 return -1;
405 }
406 if (fread(buf, 1, flen, f) != flen) {
407 SYSERROR("Failure reading template");
408 free(buf);
dd1d77f9 409 fclose(f);
3ce74686
SH
410 return -1;
411 }
dd1d77f9 412 if (fclose(f) < 0) {
3ce74686
SH
413 SYSERROR("Failre closing template");
414 free(buf);
415 return -1;
416 }
417 buf[flen] = '\0';
418 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
419 free(buf);
420 return ret;
421}
422#endif
61a1d519
CS
423
424char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup)
425{
426 va_list ap2;
427 size_t count = 1 + skip;
428 char **result;
429
430 /* first determine size of argument list, we don't want to reallocate
431 * constantly...
432 */
433 va_copy(ap2, ap);
434 while (1) {
435 char* arg = va_arg(ap2, char*);
436 if (!arg)
437 break;
438 count++;
439 }
440 va_end(ap2);
441
442 result = calloc(count, sizeof(char*));
443 if (!result)
444 return NULL;
445 count = skip;
446 while (1) {
447 char* arg = va_arg(ap, char*);
448 if (!arg)
449 break;
450 arg = do_strdup ? strdup(arg) : arg;
451 if (!arg)
452 goto oom;
453 result[count++] = arg;
454 }
455
456 /* calloc has already set last element to NULL*/
457 return result;
458
459oom:
460 free(result);
461 return NULL;
462}
463
464const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip)
465{
466 return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0);
467}
db27c8d7 468
ebec9176
AM
469extern struct lxc_popen_FILE *lxc_popen(const char *command)
470{
471 struct lxc_popen_FILE *fp = NULL;
472 int parent_end = -1, child_end = -1;
473 int pipe_fds[2];
474 pid_t child_pid;
475
476 int r = pipe2(pipe_fds, O_CLOEXEC);
477
478 if (r < 0) {
479 ERROR("pipe2 failure");
480 return NULL;
481 }
482
483 parent_end = pipe_fds[0];
484 child_end = pipe_fds[1];
485
486 child_pid = fork();
487
488 if (child_pid == 0) {
489 /* child */
490 int child_std_end = STDOUT_FILENO;
491
492 if (child_end != child_std_end) {
493 /* dup2() doesn't dup close-on-exec flag */
494 dup2(child_end, child_std_end);
495
496 /* it's safe not to close child_end here
497 * as it's marked close-on-exec anyway
498 */
499 } else {
500 /*
501 * The descriptor is already the one we will use.
502 * But it must not be marked close-on-exec.
503 * Undo the effects.
504 */
57d2be54
SG
505 if (fcntl(child_end, F_SETFD, 0) != 0) {
506 SYSERROR("Failed to remove FD_CLOEXEC from fd.");
507 exit(127);
508 }
ebec9176
AM
509 }
510
511 /*
512 * Unblock signals.
513 * This is the main/only reason
514 * why we do our lousy popen() emulation.
515 */
516 {
517 sigset_t mask;
518 sigfillset(&mask);
519 sigprocmask(SIG_UNBLOCK, &mask, NULL);
520 }
521
522 execl("/bin/sh", "sh", "-c", command, (char *) NULL);
523 exit(127);
524 }
525
526 /* parent */
527
528 close(child_end);
529 child_end = -1;
530
531 if (child_pid < 0) {
532 ERROR("fork failure");
533 goto error;
534 }
535
536 fp = calloc(1, sizeof(*fp));
537 if (!fp) {
538 ERROR("failed to allocate memory");
539 goto error;
540 }
541
542 fp->f = fdopen(parent_end, "r");
543 if (!fp->f) {
544 ERROR("fdopen failure");
545 goto error;
546 }
547
548 fp->child_pid = child_pid;
549
550 return fp;
551
552error:
553
554 if (fp) {
555 if (fp->f) {
556 fclose(fp->f);
557 parent_end = -1; /* so we do not close it second time */
558 }
559
560 free(fp);
561 }
562
ebec9176
AM
563 if (parent_end != -1)
564 close(parent_end);
565
566 return NULL;
567}
568
ebec9176
AM
569extern int lxc_pclose(struct lxc_popen_FILE *fp)
570{
571 FILE *f = NULL;
572 pid_t child_pid = 0;
573 int wstatus = 0;
574 pid_t wait_pid;
575
576 if (fp) {
577 f = fp->f;
578 child_pid = fp->child_pid;
579 /* free memory (we still need to close file stream) */
580 free(fp);
581 fp = NULL;
582 }
583
584 if (!f || fclose(f)) {
585 ERROR("fclose failure");
586 return -1;
587 }
588
589 do {
590 wait_pid = waitpid(child_pid, &wstatus, 0);
591 } while (wait_pid == -1 && errno == EINTR);
592
593 if (wait_pid == -1) {
594 ERROR("waitpid failure");
595 return -1;
596 }
597
598 return wstatus;
599}
600
502657d5
CS
601char *lxc_string_replace(const char *needle, const char *replacement, const char *haystack)
602{
603 ssize_t len = -1, saved_len = -1;
604 char *result = NULL;
605 size_t replacement_len = strlen(replacement);
606 size_t needle_len = strlen(needle);
607
608 /* should be executed exactly twice */
609 while (len == -1 || result == NULL) {
610 char *p;
611 char *last_p;
612 ssize_t part_len;
613
614 if (len != -1) {
615 result = calloc(1, len + 1);
616 if (!result)
617 return NULL;
618 saved_len = len;
619 }
620
621 len = 0;
622
623 for (last_p = (char *)haystack, p = strstr(last_p, needle); p; last_p = p, p = strstr(last_p, needle)) {
624 part_len = (ssize_t)(p - last_p);
625 if (result && part_len > 0)
626 memcpy(&result[len], last_p, part_len);
627 len += part_len;
628 if (result && replacement_len > 0)
629 memcpy(&result[len], replacement, replacement_len);
630 len += replacement_len;
631 p += needle_len;
632 }
633 part_len = strlen(last_p);
634 if (result && part_len > 0)
635 memcpy(&result[len], last_p, part_len);
636 len += part_len;
637 }
638
639 /* make sure we did the same thing twice,
640 * once for calculating length, the other
641 * time for copying data */
97bc2422
CB
642 if (saved_len != len) {
643 free(result);
644 return NULL;
645 }
502657d5
CS
646 /* make sure we didn't overwrite any buffer,
647 * due to calloc the string should be 0-terminated */
97bc2422
CB
648 if (result[len] != '\0') {
649 free(result);
650 return NULL;
651 }
502657d5
CS
652
653 return result;
654}
655
656bool lxc_string_in_array(const char *needle, const char **haystack)
657{
658 for (; haystack && *haystack; haystack++)
659 if (!strcmp(needle, *haystack))
660 return true;
661 return false;
662}
663
664char *lxc_string_join(const char *sep, const char **parts, bool use_as_prefix)
665{
666 char *result;
667 char **p;
668 size_t sep_len = strlen(sep);
669 size_t result_len = use_as_prefix * sep_len;
670
671 /* calculate new string length */
672 for (p = (char **)parts; *p; p++)
673 result_len += (p > (char **)parts) * sep_len + strlen(*p);
674
675 result = calloc(result_len + 1, 1);
676 if (!result)
677 return NULL;
678
679 if (use_as_prefix)
680 strcpy(result, sep);
681 for (p = (char **)parts; *p; p++) {
682 if (p > (char **)parts)
683 strcat(result, sep);
684 strcat(result, *p);
685 }
686
687 return result;
688}
689
690char **lxc_normalize_path(const char *path)
691{
692 char **components;
693 char **p;
694 size_t components_len = 0;
695 size_t pos = 0;
696
697 components = lxc_string_split(path, '/');
698 if (!components)
699 return NULL;
700 for (p = components; *p; p++)
701 components_len++;
702
703 /* resolve '.' and '..' */
704 for (pos = 0; pos < components_len; ) {
705 if (!strcmp(components[pos], ".") || (!strcmp(components[pos], "..") && pos == 0)) {
706 /* eat this element */
707 free(components[pos]);
708 memmove(&components[pos], &components[pos+1], sizeof(char *) * (components_len - pos));
709 components_len--;
710 } else if (!strcmp(components[pos], "..")) {
711 /* eat this and the previous element */
712 free(components[pos - 1]);
713 free(components[pos]);
714 memmove(&components[pos-1], &components[pos+1], sizeof(char *) * (components_len - pos));
715 components_len -= 2;
716 pos--;
717 } else {
718 pos++;
719 }
720 }
721
722 return components;
723}
724
c56a9652 725bool lxc_deslashify(char **path)
aeb3682f 726{
f85b16a1 727 bool ret = false;
c56a9652
CB
728 char *p;
729 char **parts = NULL;
730 size_t n, len;
aeb3682f 731
c56a9652 732 parts = lxc_normalize_path(*path);
aeb3682f
TA
733 if (!parts)
734 return false;
735
c56a9652
CB
736 /* We'll end up here if path == "///" or path == "". */
737 if (!*parts) {
738 len = strlen(*path);
f85b16a1
CB
739 if (!len) {
740 ret = true;
741 goto out;
742 }
c56a9652
CB
743 n = strcspn(*path, "/");
744 if (n == len) {
745 p = strdup("/");
746 if (!p)
f85b16a1 747 goto out;
c56a9652
CB
748 free(*path);
749 *path = p;
f85b16a1
CB
750 ret = true;
751 goto out;
c56a9652
CB
752 }
753 }
754
755 p = lxc_string_join("/", (const char **)parts, **path == '/');
c56a9652 756 if (!p)
f85b16a1 757 goto out;
aeb3682f 758
c56a9652
CB
759 free(*path);
760 *path = p;
f85b16a1 761 ret = true;
c56a9652 762
f85b16a1
CB
763out:
764 lxc_free_array((void **)parts, free);
765 return ret;
aeb3682f
TA
766}
767
24b51482
CS
768char *lxc_append_paths(const char *first, const char *second)
769{
770 size_t len = strlen(first) + strlen(second) + 1;
771 const char *pattern = "%s%s";
772 char *result = NULL;
773
774 if (second[0] != '/') {
775 len += 1;
776 pattern = "%s/%s";
777 }
778
779 result = calloc(1, len);
780 if (!result)
781 return NULL;
782
783 snprintf(result, len, pattern, first, second);
784 return result;
785}
786
502657d5
CS
787bool lxc_string_in_list(const char *needle, const char *haystack, char _sep)
788{
789 char *token, *str, *saveptr = NULL;
790 char sep[2] = { _sep, '\0' };
791
792 if (!haystack || !needle)
793 return 0;
794
795 str = alloca(strlen(haystack)+1);
796 strcpy(str, haystack);
797 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
798 if (strcmp(needle, token) == 0)
799 return 1;
800 }
801
802 return 0;
803}
804
805char **lxc_string_split(const char *string, char _sep)
806{
807 char *token, *str, *saveptr = NULL;
605ea1f7
CB
808 char sep[2] = {_sep, '\0'};
809 char **tmp = NULL, **result = NULL;
502657d5
CS
810 size_t result_capacity = 0;
811 size_t result_count = 0;
812 int r, saved_errno;
813
814 if (!string)
815 return calloc(1, sizeof(char *));
816
605ea1f7 817 str = alloca(strlen(string) + 1);
502657d5
CS
818 strcpy(str, string);
819 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
820 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
821 if (r < 0)
822 goto error_out;
823 result[result_count] = strdup(token);
824 if (!result[result_count])
825 goto error_out;
826 result_count++;
827 }
828
829 /* if we allocated too much, reduce it */
605ea1f7
CB
830 tmp = realloc(result, (result_count + 1) * sizeof(char *));
831 if (!tmp)
832 goto error_out;
833 result = tmp;
834 /* Make sure we don't return uninitialized memory. */
835 if (result_count == 0)
836 *result = NULL;
837 return result;
502657d5
CS
838error_out:
839 saved_errno = errno;
840 lxc_free_array((void **)result, free);
841 errno = saved_errno;
842 return NULL;
843}
844
845char **lxc_string_split_and_trim(const char *string, char _sep)
846{
847 char *token, *str, *saveptr = NULL;
848 char sep[2] = { _sep, '\0' };
849 char **result = NULL;
850 size_t result_capacity = 0;
851 size_t result_count = 0;
852 int r, saved_errno;
853 size_t i = 0;
854
855 if (!string)
856 return calloc(1, sizeof(char *));
857
858 str = alloca(strlen(string)+1);
859 strcpy(str, string);
860 for (; (token = strtok_r(str, sep, &saveptr)); str = NULL) {
861 while (token[0] == ' ' || token[0] == '\t')
862 token++;
863 i = strlen(token);
864 while (i > 0 && (token[i - 1] == ' ' || token[i - 1] == '\t')) {
865 token[i - 1] = '\0';
866 i--;
867 }
868 r = lxc_grow_array((void ***)&result, &result_capacity, result_count + 1, 16);
869 if (r < 0)
870 goto error_out;
871 result[result_count] = strdup(token);
872 if (!result[result_count])
873 goto error_out;
874 result_count++;
875 }
876
877 /* if we allocated too much, reduce it */
878 return realloc(result, (result_count + 1) * sizeof(char *));
879error_out:
880 saved_errno = errno;
881 lxc_free_array((void **)result, free);
882 errno = saved_errno;
883 return NULL;
884}
885
886void lxc_free_array(void **array, lxc_free_fn element_free_fn)
887{
888 void **p;
889 for (p = array; p && *p; p++)
890 element_free_fn(*p);
891 free((void*)array);
892}
893
894int lxc_grow_array(void ***array, size_t* capacity, size_t new_size, size_t capacity_increment)
895{
896 size_t new_capacity;
897 void **new_array;
898
899 /* first time around, catch some trivial mistakes of the user
900 * only initializing one of these */
901 if (!*array || !*capacity) {
902 *array = NULL;
903 *capacity = 0;
904 }
905
906 new_capacity = *capacity;
907 while (new_size + 1 > new_capacity)
908 new_capacity += capacity_increment;
909 if (new_capacity != *capacity) {
910 /* we have to reallocate */
911 new_array = realloc(*array, new_capacity * sizeof(void *));
912 if (!new_array)
913 return -1;
914 memset(&new_array[*capacity], 0, (new_capacity - (*capacity)) * sizeof(void *));
915 *array = new_array;
916 *capacity = new_capacity;
917 }
918
919 /* array has sufficient elements */
920 return 0;
921}
922
923size_t lxc_array_len(void **array)
924{
925 void **p;
926 size_t result = 0;
927
928 for (p = array; p && *p; p++)
929 result++;
930
931 return result;
932}
933
0e95426b
CS
934int lxc_write_to_file(const char *filename, const void* buf, size_t count, bool add_newline)
935{
936 int fd, saved_errno;
937 ssize_t ret;
938
939 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT | O_CLOEXEC, 0666);
940 if (fd < 0)
941 return -1;
942 ret = lxc_write_nointr(fd, buf, count);
943 if (ret < 0)
799f29ab 944 goto out_error;
0e95426b
CS
945 if ((size_t)ret != count)
946 goto out_error;
947 if (add_newline) {
948 ret = lxc_write_nointr(fd, "\n", 1);
949 if (ret != 1)
950 goto out_error;
951 }
952 close(fd);
953 return 0;
954
955out_error:
956 saved_errno = errno;
957 close(fd);
958 errno = saved_errno;
959 return -1;
960}
961
962int lxc_read_from_file(const char *filename, void* buf, size_t count)
963{
964 int fd = -1, saved_errno;
965 ssize_t ret;
966
967 fd = open(filename, O_RDONLY | O_CLOEXEC);
968 if (fd < 0)
969 return -1;
970
971 if (!buf || !count) {
972 char buf2[100];
973 size_t count2 = 0;
974 while ((ret = read(fd, buf2, 100)) > 0)
975 count2 += ret;
976 if (ret >= 0)
977 ret = count2;
978 } else {
979 memset(buf, 0, count);
980 ret = read(fd, buf, count);
981 }
982
983 if (ret < 0)
984 ERROR("read %s: %s", filename, strerror(errno));
985
986 saved_errno = errno;
987 close(fd);
988 errno = saved_errno;
989 return ret;
990}
799f29ab
ÇO
991
992void **lxc_append_null_to_array(void **array, size_t count)
993{
994 void **temp;
995
996 /* Append NULL to the array */
997 if (count) {
998 temp = realloc(array, (count + 1) * sizeof(*array));
999 if (!temp) {
84760c11 1000 size_t i;
799f29ab
ÇO
1001 for (i = 0; i < count; i++)
1002 free(array[i]);
1003 free(array);
1004 return NULL;
1005 }
1006 array = temp;
1007 array[count] = NULL;
1008 }
1009 return array;
1010}
508c263e
SH
1011
1012int randseed(bool srand_it)
1013{
1014 /*
1015 srand pre-seed function based on /dev/urandom
1016 */
1017 unsigned int seed=time(NULL)+getpid();
1018
1019 FILE *f;
1020 f = fopen("/dev/urandom", "r");
1021 if (f) {
1022 int ret = fread(&seed, sizeof(seed), 1, f);
1023 if (ret != 1)
1024 DEBUG("unable to fread /dev/urandom, %s, fallback to time+pid rand seed", strerror(errno));
1025 fclose(f);
1026 }
1027
1028 if (srand_it)
1029 srand(seed);
1030
1031 return seed;
1032}
5d897655
SH
1033
1034uid_t get_ns_uid(uid_t orig)
1035{
1036 char *line = NULL;
1037 size_t sz = 0;
1038 uid_t nsid, hostid, range;
1039 FILE *f = fopen("/proc/self/uid_map", "r");
1040 if (!f)
1041 return 0;
1042
1043 while (getline(&line, &sz, f) != -1) {
1044 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
1045 continue;
1046 if (hostid <= orig && hostid + range > orig) {
1047 nsid += orig - hostid;
1048 goto found;
1049 }
1050 }
1051
1052 nsid = 0;
1053found:
1054 fclose(f);
1055 free(line);
1056 return nsid;
1057}
c476bdce
SH
1058
1059bool dir_exists(const char *path)
1060{
1061 struct stat sb;
1062 int ret;
1063
1064 ret = stat(path, &sb);
1065 if (ret < 0)
1066 // could be something other than eexist, just say no
1067 return false;
1068 return S_ISDIR(sb.st_mode);
1069}
93c379f0
ÇO
1070
1071/* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
1072 * FNV has good anti collision properties and we're not worried
1073 * about pre-image resistance or one-way-ness, we're just trying to make
1074 * the name unique in the 108 bytes of space we have.
1075 */
1076uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
1077{
1078 unsigned char *bp;
1079
1080 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
1081 {
1082 /* xor the bottom with the current octet */
1083 hval ^= (uint64_t)*bp;
1084
1085 /* gcc optimised:
1086 * multiply by the 64 bit FNV magic prime mod 2^64
1087 */
1088 hval += (hval << 1) + (hval << 4) + (hval << 5) +
1089 (hval << 7) + (hval << 8) + (hval << 40);
1090 }
1091
1092 return hval;
1093}
2c6f3fc9
SH
1094
1095/*
1096 * Detect whether / is mounted MS_SHARED. The only way I know of to
1097 * check that is through /proc/self/mountinfo.
1098 * I'm only checking for /. If the container rootfs or mount location
1099 * is MS_SHARED, but not '/', then you're out of luck - figuring that
1100 * out would be too much work to be worth it.
1101 */
2c6f3fc9
SH
1102int detect_shared_rootfs(void)
1103{
eab15c1e 1104 char buf[LXC_LINELEN], *p;
2c6f3fc9
SH
1105 FILE *f;
1106 int i;
1107 char *p2;
1108
1109 f = fopen("/proc/self/mountinfo", "r");
1110 if (!f)
1111 return 0;
eab15c1e
CB
1112 while (fgets(buf, LXC_LINELEN, f)) {
1113 for (p = buf, i = 0; p && i < 4; i++)
1114 p = strchr(p + 1, ' ');
2c6f3fc9
SH
1115 if (!p)
1116 continue;
eab15c1e 1117 p2 = strchr(p + 1, ' ');
2c6f3fc9
SH
1118 if (!p2)
1119 continue;
1120 *p2 = '\0';
eab15c1e 1121 if (strcmp(p + 1, "/") == 0) {
2c6f3fc9 1122 // this is '/'. is it shared?
eab15c1e 1123 p = strchr(p2 + 1, ' ');
2c6f3fc9
SH
1124 if (p && strstr(p, "shared:")) {
1125 fclose(f);
1126 return 1;
1127 }
1128 }
1129 }
1130 fclose(f);
1131 return 0;
1132}
0e6e3a41 1133
51d0854c
DY
1134bool switch_to_ns(pid_t pid, const char *ns) {
1135 int fd, ret;
1136 char nspath[MAXPATHLEN];
1137
1138 /* Switch to new ns */
1139 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
1140 if (ret < 0 || ret >= MAXPATHLEN)
1141 return false;
1142
1143 fd = open(nspath, O_RDONLY);
1144 if (fd < 0) {
1145 SYSERROR("failed to open %s", nspath);
1146 return false;
1147 }
1148
1149 ret = setns(fd, 0);
1150 if (ret) {
1151 SYSERROR("failed to set process %d to %s of %d.", pid, ns, fd);
1152 close(fd);
1153 return false;
1154 }
1155 close(fd);
1156 return true;
1157}
1158
b7f954bb
SH
1159/*
1160 * looking at fs/proc_namespace.c, it appears we can
1161 * actually expect the rootfs entry to very specifically contain
1162 * " - rootfs rootfs "
1163 * IIUC, so long as we've chrooted so that rootfs is not our root,
1164 * the rootfs entry should always be skipped in mountinfo contents.
1165 */
fa454c8e 1166bool detect_ramfs_rootfs(void)
b7f954bb 1167{
b7f954bb 1168 FILE *f;
fa454c8e
CB
1169 char *p, *p2;
1170 char *line = NULL;
1171 size_t len = 0;
b7f954bb 1172 int i;
b7f954bb
SH
1173
1174 f = fopen("/proc/self/mountinfo", "r");
1175 if (!f)
fa454c8e
CB
1176 return false;
1177
1178 while (getline(&line, &len, f) != -1) {
1179 for (p = line, i = 0; p && i < 4; i++)
1180 p = strchr(p + 1, ' ');
b7f954bb
SH
1181 if (!p)
1182 continue;
fa454c8e 1183 p2 = strchr(p + 1, ' ');
b7f954bb
SH
1184 if (!p2)
1185 continue;
1186 *p2 = '\0';
fa454c8e 1187 if (strcmp(p + 1, "/") == 0) {
b7f954bb 1188 // this is '/'. is it the ramfs?
fa454c8e 1189 p = strchr(p2 + 1, '-');
b7f954bb 1190 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
fa454c8e 1191 free(line);
b7f954bb 1192 fclose(f);
fa454c8e 1193 return true;
b7f954bb
SH
1194 }
1195 }
1196 }
fa454c8e 1197 free(line);
b7f954bb 1198 fclose(f);
fa454c8e 1199 return false;
b7f954bb
SH
1200}
1201
9d9c111c 1202char *on_path(char *cmd, const char *rootfs) {
0e6e3a41
SG
1203 char *path = NULL;
1204 char *entry = NULL;
1205 char *saveptr = NULL;
1206 char cmdpath[MAXPATHLEN];
1207 int ret;
1208
1209 path = getenv("PATH");
1210 if (!path)
8afb3e61 1211 return NULL;
0e6e3a41
SG
1212
1213 path = strdup(path);
1214 if (!path)
8afb3e61 1215 return NULL;
0e6e3a41
SG
1216
1217 entry = strtok_r(path, ":", &saveptr);
1218 while (entry) {
9d9c111c
SH
1219 if (rootfs)
1220 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs, entry, cmd);
1221 else
1222 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
0e6e3a41
SG
1223
1224 if (ret < 0 || ret >= MAXPATHLEN)
1225 goto next_loop;
1226
1227 if (access(cmdpath, X_OK) == 0) {
1228 free(path);
8afb3e61 1229 return strdup(cmdpath);
0e6e3a41
SG
1230 }
1231
1232next_loop:
b707e368 1233 entry = strtok_r(NULL, ":", &saveptr);
0e6e3a41
SG
1234 }
1235
1236 free(path);
8afb3e61 1237 return NULL;
0e6e3a41 1238}
76a26f55
SH
1239
1240bool file_exists(const char *f)
1241{
1242 struct stat statbuf;
1243
1244 return stat(f, &statbuf) == 0;
1245}
9d9c111c 1246
12983ba4
SH
1247bool cgns_supported(void)
1248{
1249 return file_exists("/proc/self/ns/cgroup");
1250}
1251
9d9c111c
SH
1252/* historically lxc-init has been under /usr/lib/lxc and under
1253 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
1254 */
1255char *choose_init(const char *rootfs)
1256{
1257 char *retv = NULL;
370ec268
SF
1258 const char *empty = "",
1259 *tmp;
9d9c111c
SH
1260 int ret, env_set = 0;
1261 struct stat mystat;
1262
1263 if (!getenv("PATH")) {
1264 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
1265 SYSERROR("Failed to setenv");
1266 env_set = 1;
1267 }
1268
1269 retv = on_path("init.lxc", rootfs);
1270
1271 if (env_set) {
1272 if (unsetenv("PATH"))
1273 SYSERROR("Failed to unsetenv");
1274 }
1275
1276 if (retv)
1277 return retv;
1278
1279 retv = malloc(PATH_MAX);
1280 if (!retv)
1281 return NULL;
1282
1283 if (rootfs)
370ec268 1284 tmp = rootfs;
9d9c111c 1285 else
370ec268
SF
1286 tmp = empty;
1287
1288 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
9d9c111c
SH
1289 if (ret < 0 || ret >= PATH_MAX) {
1290 ERROR("pathname too long");
1291 goto out1;
1292 }
1293
1294 ret = stat(retv, &mystat);
1295 if (ret == 0)
1296 return retv;
1297
370ec268 1298 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
9d9c111c
SH
1299 if (ret < 0 || ret >= PATH_MAX) {
1300 ERROR("pathname too long");
1301 goto out1;
1302 }
1303
1304 ret = stat(retv, &mystat);
1305 if (ret == 0)
1306 return retv;
1307
370ec268 1308 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
9d9c111c
SH
1309 if (ret < 0 || ret >= PATH_MAX) {
1310 ERROR("pathname too long");
1311 goto out1;
1312 }
1313 ret = stat(retv, &mystat);
1314 if (ret == 0)
1315 return retv;
1316
370ec268 1317 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
9d9c111c
SH
1318 if (ret < 0 || ret >= PATH_MAX) {
1319 ERROR("pathname too long");
1320 goto out1;
1321 }
1322 ret = stat(retv, &mystat);
1323 if (ret == 0)
1324 return retv;
1325
1326 /*
1327 * Last resort, look for the statically compiled init.lxc which we
1328 * hopefully bind-mounted in.
1329 * If we are called during container setup, and we get to this point,
1330 * then the init.lxc.static from the host will need to be bind-mounted
1331 * in. So we return NULL here to indicate that.
1332 */
1333 if (rootfs)
1334 goto out1;
1335
1336 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
1337 if (ret < 0 || ret >= PATH_MAX) {
1338 WARN("Nonsense - name /lxc.init.static too long");
1339 goto out1;
1340 }
1341 ret = stat(retv, &mystat);
1342 if (ret == 0)
1343 return retv;
1344
1345out1:
1346 free(retv);
1347 return NULL;
1348}
735f2c6e
TA
1349
1350int print_to_file(const char *file, const char *content)
1351{
1352 FILE *f;
1353 int ret = 0;
1354
1355 f = fopen(file, "w");
1356 if (!f)
1357 return -1;
1358 if (fprintf(f, "%s", content) != strlen(content))
1359 ret = -1;
1360 fclose(f);
1361 return ret;
1362}
e1daebd9
SH
1363
1364int is_dir(const char *path)
1365{
1366 struct stat statbuf;
1367 int ret = stat(path, &statbuf);
1368 if (ret == 0 && S_ISDIR(statbuf.st_mode))
1369 return 1;
1370 return 0;
1371}
6010a416
SG
1372
1373/*
1374 * Given the '-t' template option to lxc-create, figure out what to
1375 * do. If the template is a full executable path, use that. If it
1376 * is something like 'sshd', then return $templatepath/lxc-sshd.
1377 * On success return the template, on error return NULL.
1378 */
1379char *get_template_path(const char *t)
1380{
1381 int ret, len;
1382 char *tpath;
1383
1384 if (t[0] == '/' && access(t, X_OK) == 0) {
1385 tpath = strdup(t);
1386 return tpath;
1387 }
1388
1389 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
1390 tpath = malloc(len);
1391 if (!tpath)
1392 return NULL;
1393 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
1394 if (ret < 0 || ret >= len) {
1395 free(tpath);
1396 return NULL;
1397 }
1398 if (access(tpath, X_OK) < 0) {
1399 SYSERROR("bad template: %s", t);
1400 free(tpath);
1401 return NULL;
1402 }
1403
1404 return tpath;
1405}
0a4be28d
TA
1406
1407/*
1408 * Sets the process title to the specified title. Note:
1409 * 1. this function requires root to succeed
1410 * 2. it clears /proc/self/environ
1411 * 3. it may not succed (e.g. if title is longer than /proc/self/environ +
1412 * the original title)
1413 */
1414int setproctitle(char *title)
1415{
058b94fe 1416 static char *proctitle = NULL;
0a4be28d
TA
1417 char buf[2048], *tmp;
1418 FILE *f;
1419 int i, len, ret = 0;
93525c00
TA
1420
1421 /* We don't really need to know all of this stuff, but unfortunately
1422 * PR_SET_MM_MAP requires us to set it all at once, so we have to
1423 * figure it out anyway.
1424 */
1425 unsigned long start_data, end_data, start_brk, start_code, end_code,
1426 start_stack, arg_start, arg_end, env_start, env_end,
1427 brk_val;
1428 struct prctl_mm_map prctl_map;
0a4be28d
TA
1429
1430 f = fopen_cloexec("/proc/self/stat", "r");
1431 if (!f) {
1432 return -1;
1433 }
1434
1435 tmp = fgets(buf, sizeof(buf), f);
1436 fclose(f);
1437 if (!tmp) {
1438 return -1;
1439 }
1440
93525c00
TA
1441 /* Skip the first 25 fields, column 26-28 are start_code, end_code,
1442 * and start_stack */
0a4be28d 1443 tmp = strchr(buf, ' ');
93525c00 1444 for (i = 0; i < 24; i++) {
0a4be28d
TA
1445 if (!tmp)
1446 return -1;
1447 tmp = strchr(tmp+1, ' ');
1448 }
73c1c887
SH
1449 if (!tmp)
1450 return -1;
1451
93525c00
TA
1452 i = sscanf(tmp, "%lu %lu %lu", &start_code, &end_code, &start_stack);
1453 if (i != 3)
0a4be28d 1454 return -1;
93525c00
TA
1455
1456 /* Skip the next 19 fields, column 45-51 are start_data to arg_end */
1457 for (i = 0; i < 19; i++) {
1458 if (!tmp)
1459 return -1;
1460 tmp = strchr(tmp+1, ' ');
0a4be28d
TA
1461 }
1462
93525c00
TA
1463 if (!tmp)
1464 return -1;
1465
1466 i = sscanf(tmp, "%lu %lu %lu %lu %lu %lu %lu",
1467 &start_data,
1468 &end_data,
1469 &start_brk,
1470 &arg_start,
1471 &arg_end,
1472 &env_start,
1473 &env_end);
1474 if (i != 7)
1475 return -1;
1476
96fe6d1d
TA
1477 /* Include the null byte here, because in the calculations below we
1478 * want to have room for it. */
1479 len = strlen(title) + 1;
1480
058b94fe
TA
1481 /* If we don't have enough room by just overwriting the old proctitle,
1482 * let's allocate a new one.
1483 */
1484 if (len > arg_end - arg_start) {
1485 void *m;
1486 m = realloc(proctitle, len);
1487 if (!m)
70642c33 1488 return -1;
058b94fe 1489 proctitle = m;
70642c33 1490
058b94fe 1491 arg_start = (unsigned long) proctitle;
0a4be28d
TA
1492 }
1493
058b94fe
TA
1494 arg_end = arg_start + len;
1495
93525c00 1496 brk_val = syscall(__NR_brk, 0);
0a4be28d 1497
93525c00
TA
1498 prctl_map = (struct prctl_mm_map) {
1499 .start_code = start_code,
1500 .end_code = end_code,
1501 .start_stack = start_stack,
1502 .start_data = start_data,
1503 .end_data = end_data,
1504 .start_brk = start_brk,
1505 .brk = brk_val,
1506 .arg_start = arg_start,
1507 .arg_end = arg_end,
1508 .env_start = env_start,
1509 .env_end = env_end,
1510 .auxv = NULL,
1511 .auxv_size = 0,
1512 .exe_fd = -1,
1513 };
1514
1515 ret = prctl(PR_SET_MM, PR_SET_MM_MAP, (long) &prctl_map, sizeof(prctl_map), 0);
1516 if (ret == 0)
1517 strcpy((char*)arg_start, title);
1518 else
2681c0e7 1519 INFO("setting cmdline failed - %s", strerror(errno));
0a4be28d
TA
1520
1521 return ret;
1522}
ced03a01 1523
592fd47a
SH
1524/*
1525 * @path: a pathname where / replaced with '\0'.
1526 * @offsetp: pointer to int showing which path segment was last seen.
1527 * Updated on return to reflect the next segment.
1528 * @fulllen: full original path length.
1529 * Returns a pointer to the next path segment, or NULL if done.
1530 */
1531static char *get_nextpath(char *path, int *offsetp, int fulllen)
1532{
1533 int offset = *offsetp;
1534
1535 if (offset >= fulllen)
1536 return NULL;
1537
1538 while (path[offset] != '\0' && offset < fulllen)
1539 offset++;
1540 while (path[offset] == '\0' && offset < fulllen)
1541 offset++;
1542
1543 *offsetp = offset;
1544 return (offset < fulllen) ? &path[offset] : NULL;
1545}
1546
1547/*
1548 * Check that @subdir is a subdir of @dir. @len is the length of
1549 * @dir (to avoid having to recalculate it).
1550 */
1551static bool is_subdir(const char *subdir, const char *dir, size_t len)
1552{
1553 size_t subdirlen = strlen(subdir);
1554
1555 if (subdirlen < len)
1556 return false;
1557 if (strncmp(subdir, dir, len) != 0)
1558 return false;
1559 if (dir[len-1] == '/')
1560 return true;
1561 if (subdir[len] == '/' || subdirlen == len)
1562 return true;
1563 return false;
1564}
1565
1566/*
1567 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
1568 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
1569 */
1570static int check_symlink(int fd)
1571{
1572 struct stat sb;
1573 int ret = fstat(fd, &sb);
1574 if (ret < 0)
1575 return -ENOENT;
1576 if (S_ISLNK(sb.st_mode))
1577 return -ELOOP;
1578 return 0;
1579}
1580
1581/*
1582 * Open a file or directory, provided that it contains no symlinks.
1583 *
1584 * CAVEAT: This function must not be used for other purposes than container
1585 * setup before executing the container's init
1586 */
1587static int open_if_safe(int dirfd, const char *nextpath)
1588{
1589 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1590 if (newfd >= 0) // was not a symlink, all good
1591 return newfd;
1592
1593 if (errno == ELOOP)
1594 return newfd;
1595
1596 if (errno == EPERM || errno == EACCES) {
1597 /* we're not root (cause we got EPERM) so
1598 try opening with O_PATH */
1599 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
1600 if (newfd >= 0) {
1601 /* O_PATH will return an fd for symlinks. We know
1602 * nextpath wasn't a symlink at last openat, so if fd
1603 * is now a link, then something * fishy is going on
1604 */
1605 int ret = check_symlink(newfd);
1606 if (ret < 0) {
1607 close(newfd);
1608 newfd = ret;
1609 }
1610 }
1611 }
1612
1613 return newfd;
1614}
1615
1616/*
1617 * Open a path intending for mounting, ensuring that the final path
1618 * is inside the container's rootfs.
1619 *
1620 * CAVEAT: This function must not be used for other purposes than container
1621 * setup before executing the container's init
1622 *
1623 * @target: path to be opened
1624 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1625 * would be the container's rootfs.
1626 *
1627 * Return an open fd for the path, or <0 on error.
1628 */
1629static int open_without_symlink(const char *target, const char *prefix_skip)
1630{
1631 int curlen = 0, dirfd, fulllen, i;
1632 char *dup = NULL;
1633
1634 fulllen = strlen(target);
1635
1636 /* make sure prefix-skip makes sense */
01074e5b 1637 if (prefix_skip && strlen(prefix_skip) > 0) {
592fd47a
SH
1638 curlen = strlen(prefix_skip);
1639 if (!is_subdir(target, prefix_skip, curlen)) {
1640 ERROR("WHOA there - target '%s' didn't start with prefix '%s'",
1641 target, prefix_skip);
1642 return -EINVAL;
1643 }
1644 /*
1645 * get_nextpath() expects the curlen argument to be
1646 * on a (turned into \0) / or before it, so decrement
1647 * curlen to make sure that happens
1648 */
1649 if (curlen)
1650 curlen--;
1651 } else {
1652 prefix_skip = "/";
1653 curlen = 0;
1654 }
1655
1656 /* Make a copy of target which we can hack up, and tokenize it */
1657 if ((dup = strdup(target)) == NULL) {
1658 SYSERROR("Out of memory checking for symbolic link");
1659 return -ENOMEM;
1660 }
1661 for (i = 0; i < fulllen; i++) {
1662 if (dup[i] == '/')
1663 dup[i] = '\0';
1664 }
1665
1666 dirfd = open(prefix_skip, O_RDONLY);
1667 if (dirfd < 0)
1668 goto out;
1669 while (1) {
1670 int newfd, saved_errno;
1671 char *nextpath;
1672
1673 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1674 goto out;
1675 newfd = open_if_safe(dirfd, nextpath);
1676 saved_errno = errno;
1677 close(dirfd);
1678 dirfd = newfd;
1679 if (newfd < 0) {
1680 errno = saved_errno;
1681 if (errno == ELOOP)
1682 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
592fd47a
SH
1683 goto out;
1684 }
1685 }
1686
1687out:
1688 free(dup);
1689 return dirfd;
1690}
1691
1692/*
1693 * Safely mount a path into a container, ensuring that the mount target
1694 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1695 * uses the host's /)
1696 *
1697 * CAVEAT: This function must not be used for other purposes than container
1698 * setup before executing the container's init
1699 */
1700int safe_mount(const char *src, const char *dest, const char *fstype,
1701 unsigned long flags, const void *data, const char *rootfs)
1702{
1703 int srcfd = -1, destfd, ret, saved_errno;
1704 char srcbuf[50], destbuf[50]; // only needs enough for /proc/self/fd/<fd>
1705 const char *mntsrc = src;
1706
1707 if (!rootfs)
1708 rootfs = "";
1709
1710 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1711 if (flags & MS_BIND && src && src[0] != '/') {
1712 INFO("this is a relative bind mount");
1713 srcfd = open_without_symlink(src, NULL);
1714 if (srcfd < 0)
1715 return srcfd;
1716 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1717 if (ret < 0 || ret > 50) {
1718 close(srcfd);
1719 ERROR("Out of memory");
1720 return -EINVAL;
1721 }
1722 mntsrc = srcbuf;
1723 }
1724
1725 destfd = open_without_symlink(dest, rootfs);
1726 if (destfd < 0) {
88e078ba
CB
1727 if (srcfd != -1) {
1728 saved_errno = errno;
592fd47a 1729 close(srcfd);
88e078ba
CB
1730 errno = saved_errno;
1731 }
592fd47a
SH
1732 return destfd;
1733 }
1734
1735 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1736 if (ret < 0 || ret > 50) {
1737 if (srcfd != -1)
1738 close(srcfd);
1739 close(destfd);
1740 ERROR("Out of memory");
1741 return -EINVAL;
1742 }
1743
1744 ret = mount(mntsrc, destbuf, fstype, flags, data);
1745 saved_errno = errno;
1746 if (srcfd != -1)
1747 close(srcfd);
1748 close(destfd);
1749 if (ret < 0) {
1750 errno = saved_errno;
1751 SYSERROR("Failed to mount %s onto %s", src, dest);
1752 return ret;
1753 }
1754
1755 return 0;
1756}
1757
ced03a01
SH
1758/*
1759 * Mount a proc under @rootfs if proc self points to a pid other than
1760 * my own. This is needed to have a known-good proc mount for setting
1761 * up LSMs both at container startup and attach.
1762 *
1763 * @rootfs : the rootfs where proc should be mounted
1764 *
1765 * Returns < 0 on failure, 0 if the correct proc was already mounted
1766 * and 1 if a new proc was mounted.
f267d666
BP
1767 *
1768 * NOTE: not to be called from inside the container namespace!
ced03a01
SH
1769 */
1770int mount_proc_if_needed(const char *rootfs)
1771{
1772 char path[MAXPATHLEN];
1773 char link[20];
2d036cca 1774 int link_to_pid, linklen, ret;
fe447886 1775 int mypid;
ced03a01
SH
1776
1777 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1778 if (ret < 0 || ret >= MAXPATHLEN) {
1779 SYSERROR("proc path name too long");
1780 return -1;
1781 }
1782 memset(link, 0, 20);
1783 linklen = readlink(path, link, 20);
fe447886
SH
1784 mypid = (int)getpid();
1785 INFO("I am %d, /proc/self points to '%s'", mypid, link);
ced03a01 1786 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
d539a2b2
CB
1787 if (ret < 0 || ret >= MAXPATHLEN) {
1788 SYSERROR("proc path name too long");
1789 return -1;
1790 }
ced03a01
SH
1791 if (linklen < 0) /* /proc not mounted */
1792 goto domount;
2d036cca
CB
1793 if (lxc_safe_int(link, &link_to_pid) < 0)
1794 return -1;
1795 if (link_to_pid != mypid) {
ced03a01
SH
1796 /* wrong /procs mounted */
1797 umount2(path, MNT_DETACH); /* ignore failure */
1798 goto domount;
1799 }
1800 /* the right proc is already mounted */
1801 return 0;
1802
1803domount:
f267d666
BP
1804 if (!strcmp(rootfs,"")) /* rootfs is NULL */
1805 ret = mount("proc", path, "proc", 0, NULL);
1806 else
1807 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
1808
1809 if (ret < 0)
ced03a01 1810 return -1;
f267d666 1811
ced03a01
SH
1812 INFO("Mounted /proc in container for security transition");
1813 return 1;
1814}
69aeabac 1815
f8dd0275 1816int open_devnull(void)
69aeabac 1817{
f8dd0275
AM
1818 int fd = open("/dev/null", O_RDWR);
1819
1820 if (fd < 0)
1821 SYSERROR("Can't open /dev/null");
1822
1823 return fd;
1824}
69aeabac 1825
f8dd0275
AM
1826int set_stdfds(int fd)
1827{
69aeabac
TA
1828 if (fd < 0)
1829 return -1;
1830
1831 if (dup2(fd, 0) < 0)
f8dd0275 1832 return -1;
69aeabac 1833 if (dup2(fd, 1) < 0)
f8dd0275 1834 return -1;
69aeabac 1835 if (dup2(fd, 2) < 0)
f8dd0275
AM
1836 return -1;
1837
1838 return 0;
1839}
1840
1841int null_stdfds(void)
1842{
1843 int ret = -1;
1844 int fd = open_devnull();
1845
1846 if (fd >= 0) {
1847 ret = set_stdfds(fd);
1848 close(fd);
1849 }
69aeabac 1850
69aeabac
TA
1851 return ret;
1852}
ccb4cabe
SH
1853
1854/*
1855 * Return the number of lines in file @fn, or -1 on error
1856 */
1857int lxc_count_file_lines(const char *fn)
1858{
1859 FILE *f;
1860 char *line = NULL;
1861 size_t sz = 0;
1862 int n = 0;
1863
1864 f = fopen_cloexec(fn, "r");
1865 if (!f)
1866 return -1;
1867
1868 while (getline(&line, &sz, f) != -1) {
1869 n++;
1870 }
1871 free(line);
1872 fclose(f);
1873 return n;
1874}
1adbd020 1875
25086a5f
CB
1876void *lxc_strmmap(void *addr, size_t length, int prot, int flags, int fd,
1877 off_t offset)
1adbd020
CB
1878{
1879 void *tmp = NULL, *overlap = NULL;
1880
1881 /* We establish an anonymous mapping that is one byte larger than the
1882 * underlying file. The pages handed to us are zero filled. */
1883 tmp = mmap(addr, length + 1, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1884 if (tmp == MAP_FAILED)
a1e5280d 1885 return tmp;
1adbd020
CB
1886
1887 /* Now we establish a fixed-address mapping starting at the address we
1888 * received from our anonymous mapping and replace all bytes excluding
1889 * the additional \0-byte with the file. This allows us to use normal
a1e5280d 1890 * string-handling functions. */
1adbd020
CB
1891 overlap = mmap(tmp, length, prot, MAP_FIXED | flags, fd, offset);
1892 if (overlap == MAP_FAILED)
a1e5280d 1893 munmap(tmp, length + 1);
1adbd020 1894
1adbd020
CB
1895 return overlap;
1896}
1897
25086a5f 1898int lxc_strmunmap(void *addr, size_t length)
1adbd020
CB
1899{
1900 return munmap(addr, length + 1);
1901}
330ae3d3
CB
1902
1903/* Check whether a signal is blocked by a process. */
de3c491b 1904/* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
eab15c1e 1905#define __PROC_STATUS_LEN (5 + (LXC_NUMSTRLEN64) + 7 + 1)
330ae3d3
CB
1906bool task_blocking_signal(pid_t pid, int signal)
1907{
1908 bool bret = false;
1909 char *line = NULL;
1910 long unsigned int sigblk = 0;
1911 size_t n = 0;
1912 int ret;
1913 FILE *f;
1914
de3c491b 1915 char status[__PROC_STATUS_LEN];
330ae3d3 1916
de3c491b
CB
1917 ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid);
1918 if (ret < 0 || ret >= __PROC_STATUS_LEN)
330ae3d3
CB
1919 return bret;
1920
1921 f = fopen(status, "r");
1922 if (!f)
1923 return bret;
1924
1925 while (getline(&line, &n, f) != -1) {
1926 if (!strncmp(line, "SigBlk:\t", 8))
1927 if (sscanf(line + 8, "%lx", &sigblk) != 1)
1928 goto out;
1929 }
1930
1931 if (sigblk & signal)
1932 bret = true;
1933
1934out:
1935 free(line);
1936 fclose(f);
1937 return bret;
1938}
000dfda7
CB
1939
1940static int lxc_append_null_to_list(void ***list)
1941{
1942 int newentry = 0;
1943 void **tmp;
1944
1945 if (*list)
1946 for (; (*list)[newentry]; newentry++) {
1947 ;
1948 }
1949
1950 tmp = realloc(*list, (newentry + 2) * sizeof(void **));
1951 if (!tmp)
1952 return -1;
1953
1954 *list = tmp;
1955 (*list)[newentry + 1] = NULL;
1956
1957 return newentry;
1958}
1959
1960int lxc_append_string(char ***list, char *entry)
1961{
000dfda7 1962 char *copy;
a54694f8
CB
1963 int newentry;
1964
1965 newentry = lxc_append_null_to_list((void ***)list);
1966 if (newentry < 0)
1967 return -1;
000dfda7
CB
1968
1969 copy = strdup(entry);
1970 if (!copy)
1971 return -1;
1972
1973 (*list)[newentry] = copy;
1974
1975 return 0;
1976}
a687256f
CB
1977
1978int lxc_preserve_ns(const int pid, const char *ns)
1979{
1980 int ret;
a052913d
CB
1981/* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1982#define __NS_PATH_LEN 50
1983 char path[__NS_PATH_LEN];
a687256f 1984
4d8ac866
CB
1985 /* This way we can use this function to also check whether namespaces
1986 * are supported by the kernel by passing in the NULL or the empty
1987 * string.
1988 */
a052913d 1989 ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
4d8ac866
CB
1990 !ns || strcmp(ns, "") == 0 ? "" : "/",
1991 !ns || strcmp(ns, "") == 0 ? "" : ns);
a052913d 1992 if (ret < 0 || (size_t)ret >= __NS_PATH_LEN)
a687256f
CB
1993 return -1;
1994
1995 return open(path, O_RDONLY | O_CLOEXEC);
1996}
6bc2eafe
CB
1997
1998int lxc_safe_uint(const char *numstr, unsigned int *converted)
1999{
2000 char *err = NULL;
2001 unsigned long int uli;
2002
2003 errno = 0;
2004 uli = strtoul(numstr, &err, 0);
2005 if (errno > 0)
2006 return -errno;
2007
2008 if (!err || err == numstr || *err != '\0')
2009 return -EINVAL;
2010
2011 if (uli > UINT_MAX)
2012 return -ERANGE;
2013
8c57d930 2014 *converted = (unsigned int)uli;
6bc2eafe
CB
2015 return 0;
2016}
b5f845e7
CB
2017
2018int lxc_safe_int(const char *numstr, int *converted)
2019{
2020 char *err = NULL;
2021 signed long int sli;
2022
2023 errno = 0;
2024 sli = strtol(numstr, &err, 0);
2025 if (errno > 0)
2026 return -errno;
2027
2028 if (!err || err == numstr || *err != '\0')
2029 return -EINVAL;
2030
2031 if (sli > INT_MAX)
2032 return -ERANGE;
2033
2034 *converted = (int)sli;
2035 return 0;
2036}
8c57d930
CB
2037
2038int lxc_safe_long(const char *numstr, long int *converted)
2039{
2040 char *err = NULL;
2041 signed long int sli;
2042
2043 errno = 0;
2044 sli = strtol(numstr, &err, 0);
2045 if (errno > 0)
2046 return -errno;
2047
2048 if (!err || err == numstr || *err != '\0')
2049 return -EINVAL;
2050
2051 if (sli > LONG_MAX)
2052 return -ERANGE;
2053
2054 *converted = sli;
2055 return 0;
2056}
dbaf55a3
CB
2057
2058int lxc_switch_uid_gid(uid_t uid, gid_t gid)
2059{
2060 if (setgid(gid) < 0) {
2061 SYSERROR("Failed to switch to gid %d.", gid);
2062 return -errno;
2063 }
2064 NOTICE("Switched to gid %d.", gid);
2065
2066 if (setuid(uid) < 0) {
2067 SYSERROR("Failed to switch to uid %d.", uid);
2068 return -errno;
2069 }
2070 NOTICE("Switched to uid %d.", uid);
2071
2072 return 0;
2073}
2074
2075/* Simple covenience function which enables uniform logging. */
2076int lxc_setgroups(int size, gid_t list[])
2077{
2078 if (setgroups(size, list) < 0) {
2079 SYSERROR("Failed to setgroups().");
2080 return -errno;
2081 }
2082 NOTICE("Dropped additional groups.");
2083
2084 return 0;
2085}