]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/utils.c
Merge pull request #2653 from brauner/2018-09-27/minor_tweaks
[mirror_lxc.git] / src / lxc / utils.c
CommitLineData
e3642c43
DL
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
e3642c43
DL
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e3642c43
DL
22 */
23
d38dd64a
CB
24#ifndef _GNU_SOURCE
25#define _GNU_SOURCE 1
26#endif
7935833c 27#define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */
643c1984 28#include <ctype.h>
a1e5280d 29#include <dirent.h>
e3642c43 30#include <errno.h>
a1e5280d 31#include <fcntl.h>
dbaf55a3 32#include <grp.h>
7935833c 33#include <inttypes.h>
a1e5280d 34#include <libgen.h>
b467714b 35#include <pthread.h>
d983b93c 36#include <stddef.h>
a1e5280d
CB
37#include <stdio.h>
38#include <stdlib.h>
61a1d519 39#include <string.h>
e3642c43 40#include <sys/mman.h>
6e4bb2e0 41#include <sys/mount.h>
a1e5280d
CB
42#include <sys/param.h>
43#include <sys/prctl.h>
44#include <sys/stat.h>
9be53773
SH
45#include <sys/types.h>
46#include <sys/wait.h>
d38dd64a 47#include <unistd.h>
e3642c43 48
d38dd64a 49#include "config.h"
e3642c43 50#include "log.h"
025ed0f3 51#include "lxclock.h"
51d0854c 52#include "namespace.h"
e3db0162 53#include "parse.h"
981f6029 54#include "utils.h"
e3642c43 55
43f984ea
DJ
56#ifndef HAVE_STRLCPY
57#include "include/strlcpy.h"
58#endif
59
bd583214
DJ
60#ifndef HAVE_STRLCAT
61#include "include/strlcat.h"
62#endif
63
4928c718
SG
64#ifndef O_PATH
65#define O_PATH 010000000
66#endif
67
68#ifndef O_NOFOLLOW
69#define O_NOFOLLOW 00400000
70#endif
71
ac2cecc4 72lxc_log_define(utils, lxc);
e3642c43 73
4295c5de
SH
74/*
75 * if path is btrfs, tries to remove it and any subvolumes beneath it
76 */
77extern bool btrfs_try_remove_subvol(const char *path);
78
41dc7155 79static int _recursive_rmdir(const char *dirname, dev_t pdev,
0cc417b2 80 const char *exclude, int level, bool onedev)
60bf62d4 81{
74f96976 82 struct dirent *direntp;
60bf62d4
SH
83 DIR *dir;
84 int ret, failed=0;
85 char pathname[MAXPATHLEN];
18aa217b 86 bool hadexclude = false;
60bf62d4
SH
87
88 dir = opendir(dirname);
89 if (!dir) {
b103ceac 90 ERROR("failed to open %s", dirname);
4355ab5f 91 return -1;
60bf62d4
SH
92 }
93
74f96976 94 while ((direntp = readdir(dir))) {
60bf62d4
SH
95 struct stat mystat;
96 int rc;
97
60bf62d4
SH
98 if (!strcmp(direntp->d_name, ".") ||
99 !strcmp(direntp->d_name, ".."))
100 continue;
101
102 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
103 if (rc < 0 || rc >= MAXPATHLEN) {
104 ERROR("pathname too long");
105 failed=1;
106 continue;
107 }
18aa217b
SH
108
109 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
110 ret = rmdir(pathname);
111 if (ret < 0) {
112 switch(errno) {
113 case ENOTEMPTY:
0cc417b2 114 INFO("Not deleting snapshot %s", pathname);
18aa217b
SH
115 hadexclude = true;
116 break;
117 case ENOTDIR:
118 ret = unlink(pathname);
119 if (ret)
b103ceac 120 INFO("Failed to remove %s", pathname);
18aa217b
SH
121 break;
122 default:
b103ceac 123 SYSERROR("Failed to rmdir %s", pathname);
18aa217b
SH
124 failed = 1;
125 break;
126 }
127 }
128 continue;
129 }
130
60bf62d4
SH
131 ret = lstat(pathname, &mystat);
132 if (ret) {
b103ceac 133 ERROR("Failed to stat %s", pathname);
4295c5de 134 failed = 1;
60bf62d4
SH
135 continue;
136 }
b14fc100 137
4295c5de
SH
138 if (onedev && mystat.st_dev != pdev) {
139 /* TODO should we be checking /proc/self/mountinfo for
140 * pathname and not doing this if found? */
141 if (btrfs_try_remove_subvol(pathname))
142 INFO("Removed btrfs subvolume at %s\n", pathname);
60bf62d4 143 continue;
4295c5de 144 }
b14fc100 145
60bf62d4 146 if (S_ISDIR(mystat.st_mode)) {
0cc417b2 147 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
60bf62d4
SH
148 failed=1;
149 } else {
150 if (unlink(pathname) < 0) {
b103ceac 151 SYSERROR("Failed to delete %s", pathname);
60bf62d4
SH
152 failed=1;
153 }
154 }
155 }
156
4295c5de 157 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
b103ceac 158 ERROR("Failed to delete %s", dirname);
4295c5de 159 failed=1;
60bf62d4
SH
160 }
161
025ed0f3 162 ret = closedir(dir);
025ed0f3 163 if (ret) {
b103ceac 164 ERROR("Failed to close directory %s", dirname);
60bf62d4
SH
165 failed=1;
166 }
167
4355ab5f 168 return failed ? -1 : 0;
60bf62d4
SH
169}
170
29a11a7f
CB
171/* In overlayfs, st_dev is unreliable. So on overlayfs we don't do the
172 * lxc_rmdir_onedev()
0cc417b2
SH
173 */
174static bool is_native_overlayfs(const char *path)
175{
29a11a7f
CB
176 if (has_fs_type(path, OVERLAY_SUPER_MAGIC) ||
177 has_fs_type(path, OVERLAYFS_SUPER_MAGIC))
0cc417b2 178 return true;
29a11a7f 179
0cc417b2
SH
180 return false;
181}
182
4355ab5f 183/* returns 0 on success, -1 if there were any failures */
41dc7155 184extern int lxc_rmdir_onedev(const char *path, const char *exclude)
60bf62d4
SH
185{
186 struct stat mystat;
0cc417b2
SH
187 bool onedev = true;
188
41dc7155 189 if (is_native_overlayfs(path))
0cc417b2 190 onedev = false;
60bf62d4
SH
191
192 if (lstat(path, &mystat) < 0) {
067650d0
SH
193 if (errno == ENOENT)
194 return 0;
41dc7155 195
b103ceac 196 ERROR("Failed to stat %s", path);
4355ab5f 197 return -1;
60bf62d4
SH
198 }
199
0cc417b2 200 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
60bf62d4
SH
201}
202
9ddaf3bf 203/* borrowed from iproute2 */
7c11d57a 204extern int get_u16(unsigned short *val, const char *arg, int base)
9ddaf3bf
JHS
205{
206 unsigned long res;
207 char *ptr;
208
209 if (!arg || !*arg)
210 return -1;
211
09bbd745 212 errno = 0;
9ddaf3bf 213 res = strtoul(arg, &ptr, base);
09bbd745 214 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
9ddaf3bf
JHS
215 return -1;
216
217 *val = res;
218
219 return 0;
220}
221
3ce74686 222extern int mkdir_p(const char *dir, mode_t mode)
1b09f2c0 223{
3ce74686
SH
224 const char *tmp = dir;
225 const char *orig = dir;
860fc865
RW
226 char *makeme;
227
228 do {
229 dir = tmp + strspn(tmp, "/");
230 tmp = dir + strcspn(dir, "/");
b14fc100 231
d74325c4 232 makeme = strndup(orig, dir - orig);
860fc865
RW
233 if (*makeme) {
234 if (mkdir(makeme, mode) && errno != EEXIST) {
959aee9c 235 SYSERROR("failed to create directory '%s'", makeme);
d74325c4 236 free(makeme);
860fc865
RW
237 return -1;
238 }
239 }
d74325c4 240 free(makeme);
860fc865 241 } while(tmp != dir);
1b09f2c0 242
98663823 243 return 0;
1b09f2c0 244}
2a59a681 245
44b9ae4b 246char *get_rundir()
9e60f51d 247{
97a696c6
SG
248 char *rundir;
249 const char *homedir;
9650c735 250 struct stat sb;
9e60f51d 251
b14fc100 252 if (stat(RUNTIME_PATH, &sb) < 0)
9650c735 253 return NULL;
9650c735
TA
254
255 if (geteuid() == sb.st_uid || getegid() == sb.st_gid) {
c580b8d2 256 rundir = strdup(RUNTIME_PATH);
d6470e71
SG
257 return rundir;
258 }
97a696c6
SG
259
260 rundir = getenv("XDG_RUNTIME_DIR");
44b9ae4b
SG
261 if (rundir) {
262 rundir = strdup(rundir);
263 return rundir;
264 }
97a696c6 265
44b9ae4b
SG
266 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
267 homedir = getenv("HOME");
268 if (!homedir) {
269 ERROR("HOME isn't set in the environment.");
270 return NULL;
97a696c6
SG
271 }
272
44b9ae4b 273 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
b14fc100 274 if (!rundir)
275 return NULL;
276
44b9ae4b
SG
277 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
278
9e60f51d
DE
279 return rundir;
280}
281
9be53773
SH
282int wait_for_pid(pid_t pid)
283{
284 int status, ret;
285
286again:
287 ret = waitpid(pid, &status, 0);
288 if (ret == -1) {
71b9b8ed 289 if (errno == EINTR)
9be53773 290 goto again;
b14fc100 291
9be53773
SH
292 return -1;
293 }
b14fc100 294
9be53773
SH
295 if (ret != pid)
296 goto again;
b14fc100 297
9be53773
SH
298 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
299 return -1;
b14fc100 300
9be53773
SH
301 return 0;
302}
c797a220
CS
303
304int lxc_wait_for_pid_status(pid_t pid)
305{
306 int status, ret;
307
308again:
309 ret = waitpid(pid, &status, 0);
310 if (ret == -1) {
311 if (errno == EINTR)
312 goto again;
b14fc100 313
c797a220
CS
314 return -1;
315 }
b14fc100 316
c797a220
CS
317 if (ret != pid)
318 goto again;
b14fc100 319
c797a220
CS
320 return status;
321}
92f023dc 322
3ce74686
SH
323#if HAVE_LIBGNUTLS
324#include <gnutls/gnutls.h>
325#include <gnutls/crypto.h>
41246cee
DE
326
327__attribute__((constructor))
328static void gnutls_lxc_init(void)
329{
330 gnutls_global_init();
331}
332
3ce74686
SH
333int sha1sum_file(char *fnam, unsigned char *digest)
334{
335 char *buf;
336 int ret;
337 FILE *f;
338 long flen;
339
340 if (!fnam)
341 return -1;
b14fc100 342
025ed0f3 343 f = fopen_cloexec(fnam, "r");
7be677a8 344 if (!f) {
3ce74686
SH
345 SYSERROR("Error opening template");
346 return -1;
347 }
b14fc100 348
3ce74686
SH
349 if (fseek(f, 0, SEEK_END) < 0) {
350 SYSERROR("Error seeking to end of template");
dd1d77f9 351 fclose(f);
3ce74686
SH
352 return -1;
353 }
b14fc100 354
3ce74686
SH
355 if ((flen = ftell(f)) < 0) {
356 SYSERROR("Error telling size of template");
dd1d77f9 357 fclose(f);
3ce74686
SH
358 return -1;
359 }
b14fc100 360
3ce74686
SH
361 if (fseek(f, 0, SEEK_SET) < 0) {
362 SYSERROR("Error seeking to start of template");
dd1d77f9 363 fclose(f);
3ce74686
SH
364 return -1;
365 }
b14fc100 366
3ce74686
SH
367 if ((buf = malloc(flen+1)) == NULL) {
368 SYSERROR("Out of memory");
dd1d77f9 369 fclose(f);
3ce74686
SH
370 return -1;
371 }
b14fc100 372
3ce74686
SH
373 if (fread(buf, 1, flen, f) != flen) {
374 SYSERROR("Failure reading template");
375 free(buf);
dd1d77f9 376 fclose(f);
3ce74686
SH
377 return -1;
378 }
b14fc100 379
dd1d77f9 380 if (fclose(f) < 0) {
3ce74686
SH
381 SYSERROR("Failre closing template");
382 free(buf);
383 return -1;
384 }
b14fc100 385
3ce74686
SH
386 buf[flen] = '\0';
387 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
388 free(buf);
389 return ret;
390}
391#endif
61a1d519 392
8bd8018e 393struct lxc_popen_FILE *lxc_popen(const char *command)
ebec9176 394{
3f323207 395 int ret;
ebec9176
AM
396 int pipe_fds[2];
397 pid_t child_pid;
8bd8018e 398 struct lxc_popen_FILE *fp = NULL;
ebec9176 399
8bd8018e
CB
400 ret = pipe2(pipe_fds, O_CLOEXEC);
401 if (ret < 0)
ebec9176 402 return NULL;
ebec9176
AM
403
404 child_pid = fork();
8bd8018e
CB
405 if (child_pid < 0)
406 goto on_error;
407
408 if (!child_pid) {
409 sigset_t mask;
410
411 close(pipe_fds[0]);
412
413 /* duplicate stdout */
414 if (pipe_fds[1] != STDOUT_FILENO)
415 ret = dup2(pipe_fds[1], STDOUT_FILENO);
416 else
417 ret = fcntl(pipe_fds[1], F_SETFD, 0);
418 if (ret < 0) {
419 close(pipe_fds[1]);
03f618af 420 _exit(EXIT_FAILURE);
3f323207
CB
421 }
422
8bd8018e
CB
423 /* duplicate stderr */
424 if (pipe_fds[1] != STDERR_FILENO)
425 ret = dup2(pipe_fds[1], STDERR_FILENO);
426 else
427 ret = fcntl(pipe_fds[1], F_SETFD, 0);
428 close(pipe_fds[1]);
429 if (ret < 0)
03f618af 430 _exit(EXIT_FAILURE);
8bd8018e
CB
431
432 /* unblock all signals */
433 ret = sigfillset(&mask);
434 if (ret < 0)
03f618af 435 _exit(EXIT_FAILURE);
8bd8018e 436
b467714b 437 ret = pthread_sigmask(SIG_UNBLOCK, &mask, NULL);
8bd8018e 438 if (ret < 0)
03f618af 439 _exit(EXIT_FAILURE);
8bd8018e
CB
440
441 execl("/bin/sh", "sh", "-c", command, (char *)NULL);
03f618af 442 _exit(127);
ebec9176
AM
443 }
444
8bd8018e
CB
445 close(pipe_fds[1]);
446 pipe_fds[1] = -1;
ebec9176 447
8bd8018e
CB
448 fp = malloc(sizeof(*fp));
449 if (!fp)
450 goto on_error;
b14fc100 451
7e50ec0b 452 memset(fp, 0, sizeof(*fp));
ebec9176
AM
453
454 fp->child_pid = child_pid;
8bd8018e 455 fp->pipe = pipe_fds[0];
ebec9176 456
7e50ec0b
CB
457 /* From now on, closing fp->f will also close fp->pipe. So only ever
458 * call fclose(fp->f).
459 */
8bd8018e
CB
460 fp->f = fdopen(pipe_fds[0], "r");
461 if (!fp->f)
462 goto on_error;
ebec9176 463
8bd8018e 464 return fp;
ebec9176 465
8bd8018e 466on_error:
7e50ec0b
CB
467 /* We can only close pipe_fds[0] if fdopen() didn't succeed or wasn't
468 * called yet. Otherwise the fd belongs to the file opened by fdopen()
469 * since it isn't dup()ed.
470 */
471 if (fp && !fp->f && pipe_fds[0] >= 0)
8bd8018e
CB
472 close(pipe_fds[0]);
473
474 if (pipe_fds[1] >= 0)
475 close(pipe_fds[1]);
ebec9176 476
7e50ec0b
CB
477 if (fp && fp->f)
478 fclose(fp->f);
479
480 if (fp)
481 free(fp);
482
ebec9176
AM
483 return NULL;
484}
485
8bd8018e 486int lxc_pclose(struct lxc_popen_FILE *fp)
ebec9176 487{
ebec9176 488 pid_t wait_pid;
8bd8018e 489 int wstatus = 0;
ebec9176 490
8bd8018e 491 if (!fp)
ebec9176 492 return -1;
ebec9176
AM
493
494 do {
8bd8018e
CB
495 wait_pid = waitpid(fp->child_pid, &wstatus, 0);
496 } while (wait_pid < 0 && errno == EINTR);
ebec9176 497
8bd8018e
CB
498 fclose(fp->f);
499 free(fp);
500
501 if (wait_pid < 0)
ebec9176 502 return -1;
ebec9176
AM
503
504 return wstatus;
505}
506
508c263e
SH
507int randseed(bool srand_it)
508{
509 /*
510 srand pre-seed function based on /dev/urandom
511 */
091045f8 512 unsigned int seed = time(NULL) + getpid();
508c263e
SH
513
514 FILE *f;
515 f = fopen("/dev/urandom", "r");
516 if (f) {
517 int ret = fread(&seed, sizeof(seed), 1, f);
518 if (ret != 1)
7874d81a 519 SYSDEBUG("unable to fread /dev/urandom, fallback to time+pid rand seed");
520
508c263e
SH
521 fclose(f);
522 }
523
524 if (srand_it)
525 srand(seed);
526
527 return seed;
528}
5d897655
SH
529
530uid_t get_ns_uid(uid_t orig)
531{
532 char *line = NULL;
533 size_t sz = 0;
534 uid_t nsid, hostid, range;
535 FILE *f = fopen("/proc/self/uid_map", "r");
536 if (!f)
537 return 0;
538
539 while (getline(&line, &sz, f) != -1) {
540 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
541 continue;
b14fc100 542
5d897655
SH
543 if (hostid <= orig && hostid + range > orig) {
544 nsid += orig - hostid;
545 goto found;
546 }
547 }
548
b962868f
CB
549 nsid = LXC_INVALID_UID;
550
551found:
552 fclose(f);
553 free(line);
554 return nsid;
555}
556
557gid_t get_ns_gid(gid_t orig)
558{
559 char *line = NULL;
560 size_t sz = 0;
561 gid_t nsid, hostid, range;
562 FILE *f = fopen("/proc/self/gid_map", "r");
563 if (!f)
564 return 0;
565
566 while (getline(&line, &sz, f) != -1) {
567 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
568 continue;
569
570 if (hostid <= orig && hostid + range > orig) {
571 nsid += orig - hostid;
572 goto found;
573 }
574 }
575
576 nsid = LXC_INVALID_GID;
b14fc100 577
5d897655
SH
578found:
579 fclose(f);
580 free(line);
581 return nsid;
582}
c476bdce
SH
583
584bool dir_exists(const char *path)
585{
586 struct stat sb;
587 int ret;
588
589 ret = stat(path, &sb);
590 if (ret < 0)
1a0e70ac 591 /* Could be something other than eexist, just say "no". */
c476bdce 592 return false;
b14fc100 593
c476bdce
SH
594 return S_ISDIR(sb.st_mode);
595}
93c379f0
ÇO
596
597/* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
598 * FNV has good anti collision properties and we're not worried
599 * about pre-image resistance or one-way-ness, we're just trying to make
600 * the name unique in the 108 bytes of space we have.
601 */
602uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
603{
604 unsigned char *bp;
605
606 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
607 {
608 /* xor the bottom with the current octet */
609 hval ^= (uint64_t)*bp;
610
611 /* gcc optimised:
612 * multiply by the 64 bit FNV magic prime mod 2^64
613 */
614 hval += (hval << 1) + (hval << 4) + (hval << 5) +
615 (hval << 7) + (hval << 8) + (hval << 40);
616 }
617
618 return hval;
619}
2c6f3fc9 620
f6310f18 621bool is_shared_mountpoint(const char *path)
2c6f3fc9 622{
f6310f18 623 char buf[LXC_LINELEN];
2c6f3fc9
SH
624 FILE *f;
625 int i;
f6310f18 626 char *p, *p2;
2c6f3fc9
SH
627
628 f = fopen("/proc/self/mountinfo", "r");
629 if (!f)
630 return 0;
b14fc100 631
eab15c1e
CB
632 while (fgets(buf, LXC_LINELEN, f)) {
633 for (p = buf, i = 0; p && i < 4; i++)
634 p = strchr(p + 1, ' ');
2c6f3fc9
SH
635 if (!p)
636 continue;
b14fc100 637
eab15c1e 638 p2 = strchr(p + 1, ' ');
2c6f3fc9
SH
639 if (!p2)
640 continue;
b14fc100 641
2c6f3fc9 642 *p2 = '\0';
f6310f18
LT
643 if (strcmp(p + 1, path) == 0) {
644 /* This is the path. Is it shared? */
eab15c1e 645 p = strchr(p2 + 1, ' ');
2c6f3fc9
SH
646 if (p && strstr(p, "shared:")) {
647 fclose(f);
f6310f18 648 return true;
2c6f3fc9
SH
649 }
650 }
651 }
b14fc100 652
2c6f3fc9 653 fclose(f);
f6310f18
LT
654 return false;
655}
656
657/*
658 * Detect whether / is mounted MS_SHARED. The only way I know of to
659 * check that is through /proc/self/mountinfo.
660 * I'm only checking for /. If the container rootfs or mount location
661 * is MS_SHARED, but not '/', then you're out of luck - figuring that
662 * out would be too much work to be worth it.
663 */
664int detect_shared_rootfs(void)
665{
666 if (is_shared_mountpoint("/"))
667 return 1;
2c6f3fc9
SH
668 return 0;
669}
0e6e3a41 670
37ef15bb
CB
671bool switch_to_ns(pid_t pid, const char *ns)
672{
51d0854c
DY
673 int fd, ret;
674 char nspath[MAXPATHLEN];
675
676 /* Switch to new ns */
677 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
678 if (ret < 0 || ret >= MAXPATHLEN)
679 return false;
680
681 fd = open(nspath, O_RDONLY);
682 if (fd < 0) {
a9cb0fb8 683 SYSERROR("Failed to open %s", nspath);
51d0854c
DY
684 return false;
685 }
686
687 ret = setns(fd, 0);
688 if (ret) {
a9cb0fb8 689 SYSERROR("Failed to set process %d to %s of %d.", pid, ns, fd);
51d0854c
DY
690 close(fd);
691 return false;
692 }
b14fc100 693
51d0854c
DY
694 close(fd);
695 return true;
696}
697
b7f954bb
SH
698/*
699 * looking at fs/proc_namespace.c, it appears we can
700 * actually expect the rootfs entry to very specifically contain
701 * " - rootfs rootfs "
702 * IIUC, so long as we've chrooted so that rootfs is not our root,
703 * the rootfs entry should always be skipped in mountinfo contents.
704 */
fa454c8e 705bool detect_ramfs_rootfs(void)
b7f954bb 706{
b7f954bb 707 FILE *f;
fa454c8e
CB
708 char *p, *p2;
709 char *line = NULL;
710 size_t len = 0;
b7f954bb 711 int i;
b7f954bb
SH
712
713 f = fopen("/proc/self/mountinfo", "r");
714 if (!f)
fa454c8e
CB
715 return false;
716
717 while (getline(&line, &len, f) != -1) {
718 for (p = line, i = 0; p && i < 4; i++)
719 p = strchr(p + 1, ' ');
b7f954bb
SH
720 if (!p)
721 continue;
b14fc100 722
fa454c8e 723 p2 = strchr(p + 1, ' ');
b7f954bb
SH
724 if (!p2)
725 continue;
b14fc100 726
b7f954bb 727 *p2 = '\0';
fa454c8e 728 if (strcmp(p + 1, "/") == 0) {
1a0e70ac 729 /* This is '/'. Is it the ramfs? */
fa454c8e 730 p = strchr(p2 + 1, '-');
b7f954bb 731 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
fa454c8e 732 free(line);
b7f954bb 733 fclose(f);
8ce1abc2 734 INFO("Rootfs is located on ramfs");
fa454c8e 735 return true;
b7f954bb
SH
736 }
737 }
738 }
b14fc100 739
fa454c8e 740 free(line);
b7f954bb 741 fclose(f);
fa454c8e 742 return false;
b7f954bb
SH
743}
744
37ef15bb
CB
745char *on_path(const char *cmd, const char *rootfs)
746{
84c5549b 747 char *entry = NULL, *path = NULL;
0e6e3a41
SG
748 char cmdpath[MAXPATHLEN];
749 int ret;
750
751 path = getenv("PATH");
752 if (!path)
8afb3e61 753 return NULL;
0e6e3a41
SG
754
755 path = strdup(path);
756 if (!path)
8afb3e61 757 return NULL;
0e6e3a41 758
37ef15bb 759 lxc_iterate_parts (entry, path, ":") {
9d9c111c 760 if (rootfs)
37ef15bb
CB
761 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs,
762 entry, cmd);
9d9c111c
SH
763 else
764 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
0e6e3a41 765 if (ret < 0 || ret >= MAXPATHLEN)
84c5549b 766 continue;
0e6e3a41
SG
767
768 if (access(cmdpath, X_OK) == 0) {
769 free(path);
8afb3e61 770 return strdup(cmdpath);
0e6e3a41 771 }
0e6e3a41
SG
772 }
773
774 free(path);
8afb3e61 775 return NULL;
0e6e3a41 776}
76a26f55 777
12983ba4
SH
778bool cgns_supported(void)
779{
780 return file_exists("/proc/self/ns/cgroup");
781}
782
9d9c111c
SH
783/* historically lxc-init has been under /usr/lib/lxc and under
784 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
785 */
786char *choose_init(const char *rootfs)
787{
788 char *retv = NULL;
370ec268
SF
789 const char *empty = "",
790 *tmp;
9d9c111c 791 int ret, env_set = 0;
9d9c111c
SH
792
793 if (!getenv("PATH")) {
794 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
795 SYSERROR("Failed to setenv");
b14fc100 796
9d9c111c
SH
797 env_set = 1;
798 }
799
800 retv = on_path("init.lxc", rootfs);
801
802 if (env_set) {
803 if (unsetenv("PATH"))
804 SYSERROR("Failed to unsetenv");
805 }
806
807 if (retv)
808 return retv;
809
810 retv = malloc(PATH_MAX);
811 if (!retv)
812 return NULL;
813
814 if (rootfs)
370ec268 815 tmp = rootfs;
9d9c111c 816 else
370ec268
SF
817 tmp = empty;
818
819 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
9d9c111c
SH
820 if (ret < 0 || ret >= PATH_MAX) {
821 ERROR("pathname too long");
822 goto out1;
823 }
b14fc100 824
e57cd7e9 825 if (access(retv, X_OK) == 0)
9d9c111c
SH
826 return retv;
827
370ec268 828 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
9d9c111c
SH
829 if (ret < 0 || ret >= PATH_MAX) {
830 ERROR("pathname too long");
831 goto out1;
832 }
b14fc100 833
e57cd7e9 834 if (access(retv, X_OK) == 0)
9d9c111c
SH
835 return retv;
836
370ec268 837 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
9d9c111c
SH
838 if (ret < 0 || ret >= PATH_MAX) {
839 ERROR("pathname too long");
840 goto out1;
841 }
b14fc100 842
e57cd7e9 843 if (access(retv, X_OK) == 0)
9d9c111c
SH
844 return retv;
845
370ec268 846 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
9d9c111c
SH
847 if (ret < 0 || ret >= PATH_MAX) {
848 ERROR("pathname too long");
849 goto out1;
850 }
b14fc100 851
e57cd7e9 852 if (access(retv, X_OK) == 0)
9d9c111c
SH
853 return retv;
854
855 /*
856 * Last resort, look for the statically compiled init.lxc which we
857 * hopefully bind-mounted in.
858 * If we are called during container setup, and we get to this point,
859 * then the init.lxc.static from the host will need to be bind-mounted
860 * in. So we return NULL here to indicate that.
861 */
862 if (rootfs)
863 goto out1;
864
865 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
866 if (ret < 0 || ret >= PATH_MAX) {
867 WARN("Nonsense - name /lxc.init.static too long");
868 goto out1;
869 }
b14fc100 870
e57cd7e9 871 if (access(retv, X_OK) == 0)
9d9c111c
SH
872 return retv;
873
874out1:
875 free(retv);
876 return NULL;
877}
735f2c6e 878
6010a416
SG
879/*
880 * Given the '-t' template option to lxc-create, figure out what to
881 * do. If the template is a full executable path, use that. If it
882 * is something like 'sshd', then return $templatepath/lxc-sshd.
883 * On success return the template, on error return NULL.
884 */
885char *get_template_path(const char *t)
886{
887 int ret, len;
888 char *tpath;
889
890 if (t[0] == '/' && access(t, X_OK) == 0) {
891 tpath = strdup(t);
892 return tpath;
893 }
894
895 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
b14fc100 896
6010a416
SG
897 tpath = malloc(len);
898 if (!tpath)
899 return NULL;
b14fc100 900
6010a416
SG
901 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
902 if (ret < 0 || ret >= len) {
903 free(tpath);
904 return NULL;
905 }
b14fc100 906
6010a416
SG
907 if (access(tpath, X_OK) < 0) {
908 SYSERROR("bad template: %s", t);
909 free(tpath);
910 return NULL;
911 }
912
913 return tpath;
914}
0a4be28d 915
592fd47a
SH
916/*
917 * @path: a pathname where / replaced with '\0'.
918 * @offsetp: pointer to int showing which path segment was last seen.
919 * Updated on return to reflect the next segment.
920 * @fulllen: full original path length.
921 * Returns a pointer to the next path segment, or NULL if done.
922 */
923static char *get_nextpath(char *path, int *offsetp, int fulllen)
924{
925 int offset = *offsetp;
926
927 if (offset >= fulllen)
928 return NULL;
929
91d9cab6 930 while (offset < fulllen && path[offset] != '\0')
592fd47a 931 offset++;
b14fc100 932
91d9cab6 933 while (offset < fulllen && path[offset] == '\0')
592fd47a
SH
934 offset++;
935
936 *offsetp = offset;
937 return (offset < fulllen) ? &path[offset] : NULL;
938}
939
940/*
941 * Check that @subdir is a subdir of @dir. @len is the length of
942 * @dir (to avoid having to recalculate it).
943 */
944static bool is_subdir(const char *subdir, const char *dir, size_t len)
945{
946 size_t subdirlen = strlen(subdir);
947
948 if (subdirlen < len)
949 return false;
b14fc100 950
592fd47a
SH
951 if (strncmp(subdir, dir, len) != 0)
952 return false;
b14fc100 953
592fd47a
SH
954 if (dir[len-1] == '/')
955 return true;
b14fc100 956
592fd47a
SH
957 if (subdir[len] == '/' || subdirlen == len)
958 return true;
b14fc100 959
592fd47a
SH
960 return false;
961}
962
963/*
964 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
965 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
966 */
967static int check_symlink(int fd)
968{
969 struct stat sb;
b14fc100 970 int ret;
971
972 ret = fstat(fd, &sb);
592fd47a
SH
973 if (ret < 0)
974 return -ENOENT;
b14fc100 975
592fd47a
SH
976 if (S_ISLNK(sb.st_mode))
977 return -ELOOP;
b14fc100 978
592fd47a
SH
979 return 0;
980}
981
982/*
983 * Open a file or directory, provided that it contains no symlinks.
984 *
985 * CAVEAT: This function must not be used for other purposes than container
986 * setup before executing the container's init
987 */
988static int open_if_safe(int dirfd, const char *nextpath)
989{
990 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1a0e70ac 991 if (newfd >= 0) /* Was not a symlink, all good. */
592fd47a
SH
992 return newfd;
993
994 if (errno == ELOOP)
995 return newfd;
996
997 if (errno == EPERM || errno == EACCES) {
1a0e70ac
CB
998 /* We're not root (cause we got EPERM) so try opening with
999 * O_PATH.
1000 */
592fd47a
SH
1001 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
1002 if (newfd >= 0) {
1a0e70ac
CB
1003 /* O_PATH will return an fd for symlinks. We know
1004 * nextpath wasn't a symlink at last openat, so if fd is
1005 * now a link, then something * fishy is going on.
592fd47a
SH
1006 */
1007 int ret = check_symlink(newfd);
1008 if (ret < 0) {
1009 close(newfd);
1010 newfd = ret;
1011 }
1012 }
1013 }
1014
1015 return newfd;
1016}
1017
1018/*
1019 * Open a path intending for mounting, ensuring that the final path
1020 * is inside the container's rootfs.
1021 *
1022 * CAVEAT: This function must not be used for other purposes than container
1023 * setup before executing the container's init
1024 *
1025 * @target: path to be opened
1026 * @prefix_skip: a part of @target in which to ignore symbolic links. This
1027 * would be the container's rootfs.
1028 *
1029 * Return an open fd for the path, or <0 on error.
1030 */
1031static int open_without_symlink(const char *target, const char *prefix_skip)
1032{
1033 int curlen = 0, dirfd, fulllen, i;
1034 char *dup = NULL;
1035
1036 fulllen = strlen(target);
1037
1038 /* make sure prefix-skip makes sense */
01074e5b 1039 if (prefix_skip && strlen(prefix_skip) > 0) {
592fd47a
SH
1040 curlen = strlen(prefix_skip);
1041 if (!is_subdir(target, prefix_skip, curlen)) {
1042 ERROR("WHOA there - target '%s' didn't start with prefix '%s'",
1043 target, prefix_skip);
1044 return -EINVAL;
1045 }
b14fc100 1046
592fd47a
SH
1047 /*
1048 * get_nextpath() expects the curlen argument to be
1049 * on a (turned into \0) / or before it, so decrement
1050 * curlen to make sure that happens
1051 */
1052 if (curlen)
1053 curlen--;
1054 } else {
1055 prefix_skip = "/";
1056 curlen = 0;
1057 }
1058
1059 /* Make a copy of target which we can hack up, and tokenize it */
1060 if ((dup = strdup(target)) == NULL) {
1061 SYSERROR("Out of memory checking for symbolic link");
1062 return -ENOMEM;
1063 }
b14fc100 1064
592fd47a
SH
1065 for (i = 0; i < fulllen; i++) {
1066 if (dup[i] == '/')
1067 dup[i] = '\0';
1068 }
1069
1070 dirfd = open(prefix_skip, O_RDONLY);
1071 if (dirfd < 0)
1072 goto out;
b14fc100 1073
592fd47a
SH
1074 while (1) {
1075 int newfd, saved_errno;
1076 char *nextpath;
1077
1078 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1079 goto out;
b14fc100 1080
592fd47a
SH
1081 newfd = open_if_safe(dirfd, nextpath);
1082 saved_errno = errno;
1083 close(dirfd);
b14fc100 1084
592fd47a
SH
1085 dirfd = newfd;
1086 if (newfd < 0) {
1087 errno = saved_errno;
1088 if (errno == ELOOP)
1089 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
b14fc100 1090
592fd47a
SH
1091 goto out;
1092 }
1093 }
1094
1095out:
1096 free(dup);
1097 return dirfd;
1098}
1099
1100/*
1101 * Safely mount a path into a container, ensuring that the mount target
1102 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1103 * uses the host's /)
1104 *
1105 * CAVEAT: This function must not be used for other purposes than container
1106 * setup before executing the container's init
1107 */
1108int safe_mount(const char *src, const char *dest, const char *fstype,
1109 unsigned long flags, const void *data, const char *rootfs)
1110{
1a0e70ac
CB
1111 int destfd, ret, saved_errno;
1112 /* Only needs enough for /proc/self/fd/<fd>. */
1113 char srcbuf[50], destbuf[50];
1114 int srcfd = -1;
592fd47a
SH
1115 const char *mntsrc = src;
1116
1117 if (!rootfs)
1118 rootfs = "";
1119
1120 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1121 if (flags & MS_BIND && src && src[0] != '/') {
1122 INFO("this is a relative bind mount");
b14fc100 1123
592fd47a
SH
1124 srcfd = open_without_symlink(src, NULL);
1125 if (srcfd < 0)
1126 return srcfd;
b14fc100 1127
592fd47a
SH
1128 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1129 if (ret < 0 || ret > 50) {
1130 close(srcfd);
1131 ERROR("Out of memory");
1132 return -EINVAL;
1133 }
1134 mntsrc = srcbuf;
1135 }
1136
1137 destfd = open_without_symlink(dest, rootfs);
1138 if (destfd < 0) {
88e078ba
CB
1139 if (srcfd != -1) {
1140 saved_errno = errno;
592fd47a 1141 close(srcfd);
88e078ba
CB
1142 errno = saved_errno;
1143 }
b14fc100 1144
592fd47a
SH
1145 return destfd;
1146 }
1147
1148 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1149 if (ret < 0 || ret > 50) {
1150 if (srcfd != -1)
1151 close(srcfd);
b14fc100 1152
592fd47a
SH
1153 close(destfd);
1154 ERROR("Out of memory");
1155 return -EINVAL;
1156 }
1157
1158 ret = mount(mntsrc, destbuf, fstype, flags, data);
1159 saved_errno = errno;
1160 if (srcfd != -1)
1161 close(srcfd);
b14fc100 1162
592fd47a
SH
1163 close(destfd);
1164 if (ret < 0) {
1165 errno = saved_errno;
0103eb53 1166 SYSERROR("Failed to mount %s onto %s", src ? src : "(null)", dest);
592fd47a
SH
1167 return ret;
1168 }
1169
1170 return 0;
1171}
1172
ced03a01
SH
1173/*
1174 * Mount a proc under @rootfs if proc self points to a pid other than
1175 * my own. This is needed to have a known-good proc mount for setting
1176 * up LSMs both at container startup and attach.
1177 *
1178 * @rootfs : the rootfs where proc should be mounted
1179 *
1180 * Returns < 0 on failure, 0 if the correct proc was already mounted
1181 * and 1 if a new proc was mounted.
f267d666
BP
1182 *
1183 * NOTE: not to be called from inside the container namespace!
ced03a01 1184 */
943144d9 1185int lxc_mount_proc_if_needed(const char *rootfs)
ced03a01
SH
1186{
1187 char path[MAXPATHLEN];
6b1ba5d6 1188 int link_to_pid, linklen, mypid, ret;
40464e8a 1189 char link[INTTYPE_TO_STRLEN(pid_t)] = {0};
ced03a01
SH
1190
1191 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1192 if (ret < 0 || ret >= MAXPATHLEN) {
1193 SYSERROR("proc path name too long");
1194 return -1;
1195 }
fc2ad9dc 1196
979a0d93 1197 linklen = readlink(path, link, sizeof(link));
fc2ad9dc 1198
ced03a01 1199 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
d539a2b2
CB
1200 if (ret < 0 || ret >= MAXPATHLEN) {
1201 SYSERROR("proc path name too long");
1202 return -1;
1203 }
fc2ad9dc
CB
1204
1205 /* /proc not mounted */
1206 if (linklen < 0) {
1207 if (mkdir(path, 0755) && errno != EEXIST)
1208 return -1;
b14fc100 1209
ced03a01 1210 goto domount;
979a0d93 1211 } else if (linklen >= sizeof(link)) {
6b1ba5d6
CB
1212 link[linklen - 1] = '\0';
1213 ERROR("readlink returned truncated content: \"%s\"", link);
1214 return -1;
fc2ad9dc
CB
1215 }
1216
0059379f 1217 mypid = lxc_raw_getpid();
6b1ba5d6
CB
1218 INFO("I am %d, /proc/self points to \"%s\"", mypid, link);
1219
2d036cca
CB
1220 if (lxc_safe_int(link, &link_to_pid) < 0)
1221 return -1;
fc2ad9dc 1222
6b1ba5d6
CB
1223 /* correct procfs is already mounted */
1224 if (link_to_pid == mypid)
1225 return 0;
fc2ad9dc 1226
6b1ba5d6
CB
1227 ret = umount2(path, MNT_DETACH);
1228 if (ret < 0)
1229 WARN("failed to umount \"%s\" with MNT_DETACH", path);
ced03a01
SH
1230
1231domount:
fc2ad9dc 1232 /* rootfs is NULL */
6b1ba5d6 1233 if (!strcmp(rootfs, ""))
f267d666
BP
1234 ret = mount("proc", path, "proc", 0, NULL);
1235 else
1236 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
f267d666 1237 if (ret < 0)
ced03a01 1238 return -1;
f267d666 1239
fc2ad9dc 1240 INFO("mounted /proc in container for security transition");
ced03a01
SH
1241 return 1;
1242}
69aeabac 1243
f8dd0275 1244int open_devnull(void)
69aeabac 1245{
f8dd0275
AM
1246 int fd = open("/dev/null", O_RDWR);
1247
1248 if (fd < 0)
1249 SYSERROR("Can't open /dev/null");
1250
1251 return fd;
1252}
69aeabac 1253
f8dd0275
AM
1254int set_stdfds(int fd)
1255{
bbbf65ee
CB
1256 int ret;
1257
69aeabac
TA
1258 if (fd < 0)
1259 return -1;
1260
bbbf65ee
CB
1261 ret = dup2(fd, STDIN_FILENO);
1262 if (ret < 0)
f8dd0275 1263 return -1;
bbbf65ee
CB
1264
1265 ret = dup2(fd, STDOUT_FILENO);
1266 if (ret < 0)
f8dd0275 1267 return -1;
bbbf65ee
CB
1268
1269 ret = dup2(fd, STDERR_FILENO);
1270 if (ret < 0)
f8dd0275
AM
1271 return -1;
1272
1273 return 0;
1274}
1275
1276int null_stdfds(void)
1277{
1278 int ret = -1;
b14fc100 1279 int fd;
f8dd0275 1280
b14fc100 1281 fd = open_devnull();
f8dd0275
AM
1282 if (fd >= 0) {
1283 ret = set_stdfds(fd);
1284 close(fd);
1285 }
69aeabac 1286
69aeabac
TA
1287 return ret;
1288}
ccb4cabe 1289
330ae3d3 1290/* Check whether a signal is blocked by a process. */
de3c491b 1291/* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
40464e8a 1292#define __PROC_STATUS_LEN (6 + INTTYPE_TO_STRLEN(pid_t) + 7 + 1)
573ad77f 1293bool task_blocks_signal(pid_t pid, int signal)
330ae3d3 1294{
330ae3d3 1295 int ret;
de3c491b 1296 char status[__PROC_STATUS_LEN];
eabf1ea9 1297 FILE *f;
573ad77f 1298 uint64_t sigblk = 0, one = 1;
eabf1ea9
CB
1299 size_t n = 0;
1300 bool bret = false;
1301 char *line = NULL;
330ae3d3 1302
de3c491b
CB
1303 ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid);
1304 if (ret < 0 || ret >= __PROC_STATUS_LEN)
330ae3d3
CB
1305 return bret;
1306
1307 f = fopen(status, "r");
1308 if (!f)
1309 return bret;
1310
1311 while (getline(&line, &n, f) != -1) {
573ad77f
CB
1312 char *numstr;
1313
eabf1ea9 1314 if (strncmp(line, "SigBlk:", 7))
6fbcbe3b
CB
1315 continue;
1316
573ad77f
CB
1317 numstr = lxc_trim_whitespace_in_place(line + 7);
1318 ret = lxc_safe_uint64(numstr, &sigblk, 16);
1319 if (ret < 0)
6fbcbe3b 1320 goto out;
573ad77f
CB
1321
1322 break;
330ae3d3
CB
1323 }
1324
573ad77f 1325 if (sigblk & (one << (signal - 1)))
330ae3d3
CB
1326 bret = true;
1327
1328out:
1329 free(line);
1330 fclose(f);
1331 return bret;
1332}
000dfda7 1333
a687256f
CB
1334int lxc_preserve_ns(const int pid, const char *ns)
1335{
1336 int ret;
a052913d
CB
1337/* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1338#define __NS_PATH_LEN 50
1339 char path[__NS_PATH_LEN];
a687256f 1340
4d8ac866
CB
1341 /* This way we can use this function to also check whether namespaces
1342 * are supported by the kernel by passing in the NULL or the empty
1343 * string.
1344 */
a052913d 1345 ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
4d8ac866
CB
1346 !ns || strcmp(ns, "") == 0 ? "" : "/",
1347 !ns || strcmp(ns, "") == 0 ? "" : ns);
ea918412 1348 if (ret < 0 || (size_t)ret >= __NS_PATH_LEN) {
1349 errno = EFBIG;
1350 return -1;
1351 }
a687256f
CB
1352
1353 return open(path, O_RDONLY | O_CLOEXEC);
1354}
6bc2eafe 1355
464c4611 1356bool lxc_switch_uid_gid(uid_t uid, gid_t gid)
dbaf55a3 1357{
db2d1af1
CB
1358 int ret = 0;
1359
1360 if (gid != LXC_INVALID_GID) {
1361 ret = setgid(gid);
1362 if (ret < 0) {
1363 SYSERROR("Failed to switch to gid %d", gid);
464c4611 1364 return false;
db2d1af1
CB
1365 }
1366 NOTICE("Switched to gid %d", gid);
dbaf55a3 1367 }
dbaf55a3 1368
db2d1af1
CB
1369 if (uid != LXC_INVALID_UID) {
1370 ret = setuid(uid);
1371 if (ret < 0) {
1372 SYSERROR("Failed to switch to uid %d", uid);
464c4611 1373 return false;
db2d1af1
CB
1374 }
1375 NOTICE("Switched to uid %d", uid);
dbaf55a3 1376 }
dbaf55a3 1377
464c4611 1378 return true;
dbaf55a3
CB
1379}
1380
1381/* Simple covenience function which enables uniform logging. */
8af07f82 1382bool lxc_setgroups(int size, gid_t list[])
dbaf55a3
CB
1383{
1384 if (setgroups(size, list) < 0) {
8af07f82
CB
1385 SYSERROR("Failed to setgroups()");
1386 return false;
dbaf55a3 1387 }
8af07f82 1388 NOTICE("Dropped additional groups");
dbaf55a3 1389
8af07f82 1390 return true;
dbaf55a3 1391}
c6868a1f
CB
1392
1393static int lxc_get_unused_loop_dev_legacy(char *loop_name)
1394{
1395 struct dirent *dp;
1396 struct loop_info64 lo64;
1397 DIR *dir;
1398 int dfd = -1, fd = -1, ret = -1;
1399
1400 dir = opendir("/dev");
1401 if (!dir)
1402 return -1;
1403
1404 while ((dp = readdir(dir))) {
c6868a1f
CB
1405 if (strncmp(dp->d_name, "loop", 4) != 0)
1406 continue;
1407
1408 dfd = dirfd(dir);
1409 if (dfd < 0)
1410 continue;
1411
1412 fd = openat(dfd, dp->d_name, O_RDWR);
1413 if (fd < 0)
1414 continue;
1415
1416 ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
1417 if (ret < 0) {
1418 if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 ||
1419 errno != ENXIO) {
1420 close(fd);
1421 fd = -1;
1422 continue;
1423 }
1424 }
1425
1426 ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name);
1427 if (ret < 0 || ret >= LO_NAME_SIZE) {
1428 close(fd);
1429 fd = -1;
1430 continue;
1431 }
1432
1433 break;
1434 }
1435
1436 closedir(dir);
1437
1438 if (fd < 0)
1439 return -1;
1440
1441 return fd;
1442}
1443
1444static int lxc_get_unused_loop_dev(char *name_loop)
1445{
1446 int loop_nr, ret;
1447 int fd_ctl = -1, fd_tmp = -1;
1448
1449 fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC);
1450 if (fd_ctl < 0)
1451 return -ENODEV;
1452
1453 loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE);
1454 if (loop_nr < 0)
1455 goto on_error;
1456
1457 ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr);
1458 if (ret < 0 || ret >= LO_NAME_SIZE)
1459 goto on_error;
1460
1461 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
1462 if (fd_tmp < 0)
1463 goto on_error;
1464
1465on_error:
1466 close(fd_ctl);
1467 return fd_tmp;
1468}
1469
1470int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags)
1471{
1472 int ret;
1473 struct loop_info64 lo64;
1474 int fd_img = -1, fret = -1, fd_loop = -1;
1475
1476 fd_loop = lxc_get_unused_loop_dev(loop_dev);
1477 if (fd_loop < 0) {
1478 if (fd_loop == -ENODEV)
1479 fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev);
1480 else
1481 goto on_error;
1482 }
1483
1484 fd_img = open(source, O_RDWR | O_CLOEXEC);
1485 if (fd_img < 0)
1486 goto on_error;
1487
1488 ret = ioctl(fd_loop, LOOP_SET_FD, fd_img);
1489 if (ret < 0)
1490 goto on_error;
1491
1492 memset(&lo64, 0, sizeof(lo64));
1493 lo64.lo_flags = flags;
1494
1495 ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64);
1496 if (ret < 0)
1497 goto on_error;
1498
1499 fret = 0;
1500
1501on_error:
1502 if (fd_img >= 0)
1503 close(fd_img);
1504
1505 if (fret < 0 && fd_loop >= 0) {
1506 close(fd_loop);
1507 fd_loop = -1;
1508 }
1509
1510 return fd_loop;
1511}
74251e49
CB
1512
1513int lxc_unstack_mountpoint(const char *path, bool lazy)
1514{
1515 int ret;
1516 int umounts = 0;
1517
1518pop_stack:
1519 ret = umount2(path, lazy ? MNT_DETACH : 0);
1520 if (ret < 0) {
1521 /* We consider anything else than EINVAL deadly to prevent going
1522 * into an infinite loop. (The other alternative is constantly
1523 * parsing /proc/self/mountinfo which is yucky and probably
1524 * racy.)
1525 */
1526 if (errno != EINVAL)
1527 return -errno;
1528 } else {
b4a40f7b
CB
1529 /* Just stop counting when this happens. That'd just be so
1530 * stupid that we won't even bother trying to report back the
1531 * correct value anymore.
1532 */
1533 if (umounts != INT_MAX)
1534 umounts++;
b14fc100 1535
74251e49
CB
1536 /* We succeeded in umounting. Make sure that there's no other
1537 * mountpoint stacked underneath.
1538 */
74251e49
CB
1539 goto pop_stack;
1540 }
1541
1542 return umounts;
1543}
ea3a694f
CB
1544
1545int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
1546{
1547 pid_t child;
1548 int ret, fret, pipefd[2];
1549 ssize_t bytes;
1550
46210729 1551 /* Make sure our callers do not receive uninitialized memory. */
ea3a694f
CB
1552 if (buf_size > 0 && buf)
1553 buf[0] = '\0';
1554
1555 if (pipe(pipefd) < 0) {
1556 SYSERROR("failed to create pipe");
1557 return -1;
1558 }
1559
2d728b2f 1560 child = lxc_raw_clone(0);
ea3a694f
CB
1561 if (child < 0) {
1562 close(pipefd[0]);
1563 close(pipefd[1]);
1564 SYSERROR("failed to create new process");
1565 return -1;
1566 }
1567
1568 if (child == 0) {
1569 /* Close the read-end of the pipe. */
1570 close(pipefd[0]);
1571
1572 /* Redirect std{err,out} to write-end of the
1573 * pipe.
1574 */
1575 ret = dup2(pipefd[1], STDOUT_FILENO);
1576 if (ret >= 0)
1577 ret = dup2(pipefd[1], STDERR_FILENO);
1578
1579 /* Close the write-end of the pipe. */
1580 close(pipefd[1]);
1581
1582 if (ret < 0) {
1583 SYSERROR("failed to duplicate std{err,out} file descriptor");
d8b3f9c3 1584 _exit(EXIT_FAILURE);
ea3a694f
CB
1585 }
1586
1587 /* Does not return. */
1588 child_fn(args);
1589 ERROR("failed to exec command");
d8b3f9c3 1590 _exit(EXIT_FAILURE);
ea3a694f
CB
1591 }
1592
1593 /* close the write-end of the pipe */
1594 close(pipefd[1]);
1595
7a643c7c 1596 if (buf && buf_size > 0) {
a5bc6cb0 1597 bytes = lxc_read_nointr(pipefd[0], buf, buf_size - 1);
7a643c7c
CB
1598 if (bytes > 0)
1599 buf[bytes - 1] = '\0';
1600 }
ea3a694f
CB
1601
1602 fret = wait_for_pid(child);
1603 /* close the read-end of the pipe */
1604 close(pipefd[0]);
1605
1606 return fret;
1607}
04ad7ffe 1608
d75c14e2
CB
1609bool lxc_nic_exists(char *nic)
1610{
1611#define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
1612 char path[__LXC_SYS_CLASS_NET_LEN];
1613 int ret;
1614 struct stat sb;
1615
1616 if (!strcmp(nic, "none"))
1617 return true;
1618
1619 ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic);
1620 if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN)
1621 return false;
1622
1623 ret = stat(path, &sb);
1624 if (ret < 0)
1625 return false;
1626
1627 return true;
1628}
127c6e70 1629
6222c3f4
CB
1630uint64_t lxc_find_next_power2(uint64_t n)
1631{
1632 /* 0 is not valid input. We return 0 to the caller since 0 is not a
1633 * valid power of two.
1634 */
1635 if (n == 0)
1636 return 0;
1637
1638 if (!(n & (n - 1)))
1639 return n;
1640
1641 while (n & (n - 1))
1642 n = n & (n - 1);
1643
1644 n = n << 1;
1645 return n;
1646}
1fd0f41e
CB
1647
1648int lxc_set_death_signal(int signal)
1649{
1650 int ret;
1651 pid_t ppid;
1652
b81689a1
CB
1653 ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0),
1654 prctl_arg(0), prctl_arg(0));
1fd0f41e
CB
1655
1656 /* Check whether we have been orphaned. */
1657 ppid = (pid_t)syscall(SYS_getppid);
1658 if (ppid == 1) {
1659 pid_t self;
1660
1661 self = lxc_raw_getpid();
1662 ret = kill(self, SIGKILL);
1663 if (ret < 0)
1664 return -1;
1665 }
1666
1667 if (ret < 0) {
1668 SYSERROR("Failed to set PR_SET_PDEATHSIG to %d", signal);
1669 return -1;
1670 }
1671
1672 return 0;
1673}
7ad37670 1674
a9d4ebc1
CB
1675int fd_cloexec(int fd, bool cloexec)
1676{
1677 int oflags, nflags;
1678
1679 oflags = fcntl(fd, F_GETFD, 0);
1680 if (oflags < 0)
1681 return -errno;
1682
1683 if (cloexec)
1684 nflags = oflags | FD_CLOEXEC;
1685 else
1686 nflags = oflags & ~FD_CLOEXEC;
1687
1688 if (nflags == oflags)
1689 return 0;
1690
1691 if (fcntl(fd, F_SETFD, nflags) < 0)
1692 return -errno;
1693
1694 return 0;
1695}
d7ab0375 1696
1697int recursive_destroy(char *dirname)
1698{
1699 int ret;
1700 struct dirent *direntp;
1701 DIR *dir;
1702 int r = 0;
1703
1704 dir = opendir(dirname);
1705 if (!dir)
1706 return -1;
1707
1708 while ((direntp = readdir(dir))) {
1709 char *pathname;
1710 struct stat mystat;
1711
1712 if (!strcmp(direntp->d_name, ".") ||
1713 !strcmp(direntp->d_name, ".."))
1714 continue;
1715
1716 pathname = must_make_path(dirname, direntp->d_name, NULL);
1717
1718 ret = lstat(pathname, &mystat);
1719 if (ret < 0) {
1720 if (!r)
1721 WARN("Failed to stat \"%s\"", pathname);
1722
1723 r = -1;
1724 goto next;
1725 }
1726
1727 if (!S_ISDIR(mystat.st_mode))
1728 goto next;
1729
1730 ret = recursive_destroy(pathname);
1731 if (ret < 0)
1732 r = -1;
1733
1734 next:
1735 free(pathname);
1736 }
1737
1738 ret = rmdir(dirname);
1739 if (ret < 0) {
1740 if (!r)
1741 SYSWARN("Failed to delete \"%s\"", dirname);
1742
1743 r = -1;
1744 }
1745
1746 ret = closedir(dir);
1747 if (ret < 0) {
1748 if (!r)
1749 SYSWARN("Failed to delete \"%s\"", dirname);
1750
1751 r = -1;
1752 }
1753
1754 return r;
1755}