]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/utils.c
utils: split into {file,string}_utils.{c,h}
[mirror_lxc.git] / src / lxc / utils.c
CommitLineData
e3642c43
DL
1/*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
9afe19d6 7 * Daniel Lezcano <daniel.lezcano at free.fr>
e3642c43
DL
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
250b1eec 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e3642c43
DL
22 */
23
052616eb
ÇO
24#include "config.h"
25
7935833c 26#define __STDC_FORMAT_MACROS /* Required for PRIu64 to work. */
643c1984 27#include <ctype.h>
a1e5280d 28#include <dirent.h>
e3642c43 29#include <errno.h>
a1e5280d 30#include <fcntl.h>
dbaf55a3 31#include <grp.h>
7935833c 32#include <inttypes.h>
a1e5280d 33#include <libgen.h>
b467714b 34#include <pthread.h>
d983b93c 35#include <stddef.h>
a1e5280d
CB
36#include <stdio.h>
37#include <stdlib.h>
61a1d519 38#include <string.h>
981f6029 39#include <unistd.h>
e3642c43 40#include <sys/mman.h>
6e4bb2e0 41#include <sys/mount.h>
a1e5280d
CB
42#include <sys/param.h>
43#include <sys/prctl.h>
44#include <sys/stat.h>
9be53773
SH
45#include <sys/types.h>
46#include <sys/wait.h>
e3642c43
DL
47
48#include "log.h"
025ed0f3 49#include "lxclock.h"
51d0854c 50#include "namespace.h"
e3db0162 51#include "parse.h"
981f6029 52#include "utils.h"
e3642c43 53
43f984ea
DJ
54#ifndef HAVE_STRLCPY
55#include "include/strlcpy.h"
56#endif
57
bd583214
DJ
58#ifndef HAVE_STRLCAT
59#include "include/strlcat.h"
60#endif
61
4928c718
SG
62#ifndef O_PATH
63#define O_PATH 010000000
64#endif
65
66#ifndef O_NOFOLLOW
67#define O_NOFOLLOW 00400000
68#endif
69
ac2cecc4 70lxc_log_define(utils, lxc);
e3642c43 71
4295c5de
SH
72/*
73 * if path is btrfs, tries to remove it and any subvolumes beneath it
74 */
75extern bool btrfs_try_remove_subvol(const char *path);
76
41dc7155 77static int _recursive_rmdir(const char *dirname, dev_t pdev,
0cc417b2 78 const char *exclude, int level, bool onedev)
60bf62d4 79{
74f96976 80 struct dirent *direntp;
60bf62d4
SH
81 DIR *dir;
82 int ret, failed=0;
83 char pathname[MAXPATHLEN];
18aa217b 84 bool hadexclude = false;
60bf62d4
SH
85
86 dir = opendir(dirname);
87 if (!dir) {
b103ceac 88 ERROR("failed to open %s", dirname);
4355ab5f 89 return -1;
60bf62d4
SH
90 }
91
74f96976 92 while ((direntp = readdir(dir))) {
60bf62d4
SH
93 struct stat mystat;
94 int rc;
95
60bf62d4
SH
96 if (!strcmp(direntp->d_name, ".") ||
97 !strcmp(direntp->d_name, ".."))
98 continue;
99
100 rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name);
101 if (rc < 0 || rc >= MAXPATHLEN) {
102 ERROR("pathname too long");
103 failed=1;
104 continue;
105 }
18aa217b
SH
106
107 if (!level && exclude && !strcmp(direntp->d_name, exclude)) {
108 ret = rmdir(pathname);
109 if (ret < 0) {
110 switch(errno) {
111 case ENOTEMPTY:
0cc417b2 112 INFO("Not deleting snapshot %s", pathname);
18aa217b
SH
113 hadexclude = true;
114 break;
115 case ENOTDIR:
116 ret = unlink(pathname);
117 if (ret)
b103ceac 118 INFO("Failed to remove %s", pathname);
18aa217b
SH
119 break;
120 default:
b103ceac 121 SYSERROR("Failed to rmdir %s", pathname);
18aa217b
SH
122 failed = 1;
123 break;
124 }
125 }
126 continue;
127 }
128
60bf62d4
SH
129 ret = lstat(pathname, &mystat);
130 if (ret) {
b103ceac 131 ERROR("Failed to stat %s", pathname);
4295c5de 132 failed = 1;
60bf62d4
SH
133 continue;
134 }
b14fc100 135
4295c5de
SH
136 if (onedev && mystat.st_dev != pdev) {
137 /* TODO should we be checking /proc/self/mountinfo for
138 * pathname and not doing this if found? */
139 if (btrfs_try_remove_subvol(pathname))
140 INFO("Removed btrfs subvolume at %s\n", pathname);
60bf62d4 141 continue;
4295c5de 142 }
b14fc100 143
60bf62d4 144 if (S_ISDIR(mystat.st_mode)) {
0cc417b2 145 if (_recursive_rmdir(pathname, pdev, exclude, level+1, onedev) < 0)
60bf62d4
SH
146 failed=1;
147 } else {
148 if (unlink(pathname) < 0) {
b103ceac 149 SYSERROR("Failed to delete %s", pathname);
60bf62d4
SH
150 failed=1;
151 }
152 }
153 }
154
4295c5de 155 if (rmdir(dirname) < 0 && !btrfs_try_remove_subvol(dirname) && !hadexclude) {
b103ceac 156 ERROR("Failed to delete %s", dirname);
4295c5de 157 failed=1;
60bf62d4
SH
158 }
159
025ed0f3 160 ret = closedir(dir);
025ed0f3 161 if (ret) {
b103ceac 162 ERROR("Failed to close directory %s", dirname);
60bf62d4
SH
163 failed=1;
164 }
165
4355ab5f 166 return failed ? -1 : 0;
60bf62d4
SH
167}
168
29a11a7f
CB
169/* In overlayfs, st_dev is unreliable. So on overlayfs we don't do the
170 * lxc_rmdir_onedev()
0cc417b2
SH
171 */
172static bool is_native_overlayfs(const char *path)
173{
29a11a7f
CB
174 if (has_fs_type(path, OVERLAY_SUPER_MAGIC) ||
175 has_fs_type(path, OVERLAYFS_SUPER_MAGIC))
0cc417b2 176 return true;
29a11a7f 177
0cc417b2
SH
178 return false;
179}
180
4355ab5f 181/* returns 0 on success, -1 if there were any failures */
41dc7155 182extern int lxc_rmdir_onedev(const char *path, const char *exclude)
60bf62d4
SH
183{
184 struct stat mystat;
0cc417b2
SH
185 bool onedev = true;
186
41dc7155 187 if (is_native_overlayfs(path))
0cc417b2 188 onedev = false;
60bf62d4
SH
189
190 if (lstat(path, &mystat) < 0) {
067650d0
SH
191 if (errno == ENOENT)
192 return 0;
41dc7155 193
b103ceac 194 ERROR("Failed to stat %s", path);
4355ab5f 195 return -1;
60bf62d4
SH
196 }
197
0cc417b2 198 return _recursive_rmdir(path, mystat.st_dev, exclude, 0, onedev);
60bf62d4
SH
199}
200
9ddaf3bf 201/* borrowed from iproute2 */
7c11d57a 202extern int get_u16(unsigned short *val, const char *arg, int base)
9ddaf3bf
JHS
203{
204 unsigned long res;
205 char *ptr;
206
207 if (!arg || !*arg)
208 return -1;
209
09bbd745 210 errno = 0;
9ddaf3bf 211 res = strtoul(arg, &ptr, base);
09bbd745 212 if (!ptr || ptr == arg || *ptr || res > 0xFFFF || errno != 0)
9ddaf3bf
JHS
213 return -1;
214
215 *val = res;
216
217 return 0;
218}
219
3ce74686 220extern int mkdir_p(const char *dir, mode_t mode)
1b09f2c0 221{
3ce74686
SH
222 const char *tmp = dir;
223 const char *orig = dir;
860fc865
RW
224 char *makeme;
225
226 do {
227 dir = tmp + strspn(tmp, "/");
228 tmp = dir + strcspn(dir, "/");
b14fc100 229
d74325c4 230 makeme = strndup(orig, dir - orig);
860fc865
RW
231 if (*makeme) {
232 if (mkdir(makeme, mode) && errno != EEXIST) {
959aee9c 233 SYSERROR("failed to create directory '%s'", makeme);
d74325c4 234 free(makeme);
860fc865
RW
235 return -1;
236 }
237 }
d74325c4 238 free(makeme);
860fc865 239 } while(tmp != dir);
1b09f2c0 240
98663823 241 return 0;
1b09f2c0 242}
2a59a681 243
44b9ae4b 244char *get_rundir()
9e60f51d 245{
97a696c6
SG
246 char *rundir;
247 const char *homedir;
9650c735 248 struct stat sb;
9e60f51d 249
b14fc100 250 if (stat(RUNTIME_PATH, &sb) < 0)
9650c735 251 return NULL;
9650c735
TA
252
253 if (geteuid() == sb.st_uid || getegid() == sb.st_gid) {
c580b8d2 254 rundir = strdup(RUNTIME_PATH);
d6470e71
SG
255 return rundir;
256 }
97a696c6
SG
257
258 rundir = getenv("XDG_RUNTIME_DIR");
44b9ae4b
SG
259 if (rundir) {
260 rundir = strdup(rundir);
261 return rundir;
262 }
97a696c6 263
44b9ae4b
SG
264 INFO("XDG_RUNTIME_DIR isn't set in the environment.");
265 homedir = getenv("HOME");
266 if (!homedir) {
267 ERROR("HOME isn't set in the environment.");
268 return NULL;
97a696c6
SG
269 }
270
44b9ae4b 271 rundir = malloc(sizeof(char) * (17 + strlen(homedir)));
b14fc100 272 if (!rundir)
273 return NULL;
274
44b9ae4b
SG
275 sprintf(rundir, "%s/.cache/lxc/run/", homedir);
276
9e60f51d
DE
277 return rundir;
278}
279
9be53773
SH
280int wait_for_pid(pid_t pid)
281{
282 int status, ret;
283
284again:
285 ret = waitpid(pid, &status, 0);
286 if (ret == -1) {
71b9b8ed 287 if (errno == EINTR)
9be53773 288 goto again;
b14fc100 289
9be53773
SH
290 return -1;
291 }
b14fc100 292
9be53773
SH
293 if (ret != pid)
294 goto again;
b14fc100 295
9be53773
SH
296 if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
297 return -1;
b14fc100 298
9be53773
SH
299 return 0;
300}
c797a220
CS
301
302int lxc_wait_for_pid_status(pid_t pid)
303{
304 int status, ret;
305
306again:
307 ret = waitpid(pid, &status, 0);
308 if (ret == -1) {
309 if (errno == EINTR)
310 goto again;
b14fc100 311
c797a220
CS
312 return -1;
313 }
b14fc100 314
c797a220
CS
315 if (ret != pid)
316 goto again;
b14fc100 317
c797a220
CS
318 return status;
319}
92f023dc 320
3ce74686
SH
321#if HAVE_LIBGNUTLS
322#include <gnutls/gnutls.h>
323#include <gnutls/crypto.h>
41246cee
DE
324
325__attribute__((constructor))
326static void gnutls_lxc_init(void)
327{
328 gnutls_global_init();
329}
330
3ce74686
SH
331int sha1sum_file(char *fnam, unsigned char *digest)
332{
333 char *buf;
334 int ret;
335 FILE *f;
336 long flen;
337
338 if (!fnam)
339 return -1;
b14fc100 340
025ed0f3 341 f = fopen_cloexec(fnam, "r");
7be677a8 342 if (!f) {
3ce74686
SH
343 SYSERROR("Error opening template");
344 return -1;
345 }
b14fc100 346
3ce74686
SH
347 if (fseek(f, 0, SEEK_END) < 0) {
348 SYSERROR("Error seeking to end of template");
dd1d77f9 349 fclose(f);
3ce74686
SH
350 return -1;
351 }
b14fc100 352
3ce74686
SH
353 if ((flen = ftell(f)) < 0) {
354 SYSERROR("Error telling size of template");
dd1d77f9 355 fclose(f);
3ce74686
SH
356 return -1;
357 }
b14fc100 358
3ce74686
SH
359 if (fseek(f, 0, SEEK_SET) < 0) {
360 SYSERROR("Error seeking to start of template");
dd1d77f9 361 fclose(f);
3ce74686
SH
362 return -1;
363 }
b14fc100 364
3ce74686
SH
365 if ((buf = malloc(flen+1)) == NULL) {
366 SYSERROR("Out of memory");
dd1d77f9 367 fclose(f);
3ce74686
SH
368 return -1;
369 }
b14fc100 370
3ce74686
SH
371 if (fread(buf, 1, flen, f) != flen) {
372 SYSERROR("Failure reading template");
373 free(buf);
dd1d77f9 374 fclose(f);
3ce74686
SH
375 return -1;
376 }
b14fc100 377
dd1d77f9 378 if (fclose(f) < 0) {
3ce74686
SH
379 SYSERROR("Failre closing template");
380 free(buf);
381 return -1;
382 }
b14fc100 383
3ce74686
SH
384 buf[flen] = '\0';
385 ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest);
386 free(buf);
387 return ret;
388}
389#endif
61a1d519 390
8bd8018e 391struct lxc_popen_FILE *lxc_popen(const char *command)
ebec9176 392{
3f323207 393 int ret;
ebec9176
AM
394 int pipe_fds[2];
395 pid_t child_pid;
8bd8018e 396 struct lxc_popen_FILE *fp = NULL;
ebec9176 397
8bd8018e
CB
398 ret = pipe2(pipe_fds, O_CLOEXEC);
399 if (ret < 0)
ebec9176 400 return NULL;
ebec9176
AM
401
402 child_pid = fork();
8bd8018e
CB
403 if (child_pid < 0)
404 goto on_error;
405
406 if (!child_pid) {
407 sigset_t mask;
408
409 close(pipe_fds[0]);
410
411 /* duplicate stdout */
412 if (pipe_fds[1] != STDOUT_FILENO)
413 ret = dup2(pipe_fds[1], STDOUT_FILENO);
414 else
415 ret = fcntl(pipe_fds[1], F_SETFD, 0);
416 if (ret < 0) {
417 close(pipe_fds[1]);
03f618af 418 _exit(EXIT_FAILURE);
3f323207
CB
419 }
420
8bd8018e
CB
421 /* duplicate stderr */
422 if (pipe_fds[1] != STDERR_FILENO)
423 ret = dup2(pipe_fds[1], STDERR_FILENO);
424 else
425 ret = fcntl(pipe_fds[1], F_SETFD, 0);
426 close(pipe_fds[1]);
427 if (ret < 0)
03f618af 428 _exit(EXIT_FAILURE);
8bd8018e
CB
429
430 /* unblock all signals */
431 ret = sigfillset(&mask);
432 if (ret < 0)
03f618af 433 _exit(EXIT_FAILURE);
8bd8018e 434
b467714b 435 ret = pthread_sigmask(SIG_UNBLOCK, &mask, NULL);
8bd8018e 436 if (ret < 0)
03f618af 437 _exit(EXIT_FAILURE);
8bd8018e
CB
438
439 execl("/bin/sh", "sh", "-c", command, (char *)NULL);
03f618af 440 _exit(127);
ebec9176
AM
441 }
442
8bd8018e
CB
443 close(pipe_fds[1]);
444 pipe_fds[1] = -1;
ebec9176 445
8bd8018e
CB
446 fp = malloc(sizeof(*fp));
447 if (!fp)
448 goto on_error;
b14fc100 449
7e50ec0b 450 memset(fp, 0, sizeof(*fp));
ebec9176
AM
451
452 fp->child_pid = child_pid;
8bd8018e 453 fp->pipe = pipe_fds[0];
ebec9176 454
7e50ec0b
CB
455 /* From now on, closing fp->f will also close fp->pipe. So only ever
456 * call fclose(fp->f).
457 */
8bd8018e
CB
458 fp->f = fdopen(pipe_fds[0], "r");
459 if (!fp->f)
460 goto on_error;
ebec9176 461
8bd8018e 462 return fp;
ebec9176 463
8bd8018e 464on_error:
7e50ec0b
CB
465 /* We can only close pipe_fds[0] if fdopen() didn't succeed or wasn't
466 * called yet. Otherwise the fd belongs to the file opened by fdopen()
467 * since it isn't dup()ed.
468 */
469 if (fp && !fp->f && pipe_fds[0] >= 0)
8bd8018e
CB
470 close(pipe_fds[0]);
471
472 if (pipe_fds[1] >= 0)
473 close(pipe_fds[1]);
ebec9176 474
7e50ec0b
CB
475 if (fp && fp->f)
476 fclose(fp->f);
477
478 if (fp)
479 free(fp);
480
ebec9176
AM
481 return NULL;
482}
483
8bd8018e 484int lxc_pclose(struct lxc_popen_FILE *fp)
ebec9176 485{
ebec9176 486 pid_t wait_pid;
8bd8018e 487 int wstatus = 0;
ebec9176 488
8bd8018e 489 if (!fp)
ebec9176 490 return -1;
ebec9176
AM
491
492 do {
8bd8018e
CB
493 wait_pid = waitpid(fp->child_pid, &wstatus, 0);
494 } while (wait_pid < 0 && errno == EINTR);
ebec9176 495
8bd8018e
CB
496 fclose(fp->f);
497 free(fp);
498
499 if (wait_pid < 0)
ebec9176 500 return -1;
ebec9176
AM
501
502 return wstatus;
503}
504
508c263e
SH
505int randseed(bool srand_it)
506{
507 /*
508 srand pre-seed function based on /dev/urandom
509 */
091045f8 510 unsigned int seed = time(NULL) + getpid();
508c263e
SH
511
512 FILE *f;
513 f = fopen("/dev/urandom", "r");
514 if (f) {
515 int ret = fread(&seed, sizeof(seed), 1, f);
516 if (ret != 1)
7874d81a 517 SYSDEBUG("unable to fread /dev/urandom, fallback to time+pid rand seed");
518
508c263e
SH
519 fclose(f);
520 }
521
522 if (srand_it)
523 srand(seed);
524
525 return seed;
526}
5d897655
SH
527
528uid_t get_ns_uid(uid_t orig)
529{
530 char *line = NULL;
531 size_t sz = 0;
532 uid_t nsid, hostid, range;
533 FILE *f = fopen("/proc/self/uid_map", "r");
534 if (!f)
535 return 0;
536
537 while (getline(&line, &sz, f) != -1) {
538 if (sscanf(line, "%u %u %u", &nsid, &hostid, &range) != 3)
539 continue;
b14fc100 540
5d897655
SH
541 if (hostid <= orig && hostid + range > orig) {
542 nsid += orig - hostid;
543 goto found;
544 }
545 }
546
547 nsid = 0;
b14fc100 548
5d897655
SH
549found:
550 fclose(f);
551 free(line);
552 return nsid;
553}
c476bdce
SH
554
555bool dir_exists(const char *path)
556{
557 struct stat sb;
558 int ret;
559
560 ret = stat(path, &sb);
561 if (ret < 0)
1a0e70ac 562 /* Could be something other than eexist, just say "no". */
c476bdce 563 return false;
b14fc100 564
c476bdce
SH
565 return S_ISDIR(sb.st_mode);
566}
93c379f0
ÇO
567
568/* Note we don't use SHA-1 here as we don't want to depend on HAVE_GNUTLS.
569 * FNV has good anti collision properties and we're not worried
570 * about pre-image resistance or one-way-ness, we're just trying to make
571 * the name unique in the 108 bytes of space we have.
572 */
573uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
574{
575 unsigned char *bp;
576
577 for(bp = buf; bp < (unsigned char *)buf + len; bp++)
578 {
579 /* xor the bottom with the current octet */
580 hval ^= (uint64_t)*bp;
581
582 /* gcc optimised:
583 * multiply by the 64 bit FNV magic prime mod 2^64
584 */
585 hval += (hval << 1) + (hval << 4) + (hval << 5) +
586 (hval << 7) + (hval << 8) + (hval << 40);
587 }
588
589 return hval;
590}
2c6f3fc9 591
f6310f18 592bool is_shared_mountpoint(const char *path)
2c6f3fc9 593{
f6310f18 594 char buf[LXC_LINELEN];
2c6f3fc9
SH
595 FILE *f;
596 int i;
f6310f18 597 char *p, *p2;
2c6f3fc9
SH
598
599 f = fopen("/proc/self/mountinfo", "r");
600 if (!f)
601 return 0;
b14fc100 602
eab15c1e
CB
603 while (fgets(buf, LXC_LINELEN, f)) {
604 for (p = buf, i = 0; p && i < 4; i++)
605 p = strchr(p + 1, ' ');
2c6f3fc9
SH
606 if (!p)
607 continue;
b14fc100 608
eab15c1e 609 p2 = strchr(p + 1, ' ');
2c6f3fc9
SH
610 if (!p2)
611 continue;
b14fc100 612
2c6f3fc9 613 *p2 = '\0';
f6310f18
LT
614 if (strcmp(p + 1, path) == 0) {
615 /* This is the path. Is it shared? */
eab15c1e 616 p = strchr(p2 + 1, ' ');
2c6f3fc9
SH
617 if (p && strstr(p, "shared:")) {
618 fclose(f);
f6310f18 619 return true;
2c6f3fc9
SH
620 }
621 }
622 }
b14fc100 623
2c6f3fc9 624 fclose(f);
f6310f18
LT
625 return false;
626}
627
628/*
629 * Detect whether / is mounted MS_SHARED. The only way I know of to
630 * check that is through /proc/self/mountinfo.
631 * I'm only checking for /. If the container rootfs or mount location
632 * is MS_SHARED, but not '/', then you're out of luck - figuring that
633 * out would be too much work to be worth it.
634 */
635int detect_shared_rootfs(void)
636{
637 if (is_shared_mountpoint("/"))
638 return 1;
2c6f3fc9
SH
639 return 0;
640}
0e6e3a41 641
37ef15bb
CB
642bool switch_to_ns(pid_t pid, const char *ns)
643{
51d0854c
DY
644 int fd, ret;
645 char nspath[MAXPATHLEN];
646
647 /* Switch to new ns */
648 ret = snprintf(nspath, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns);
649 if (ret < 0 || ret >= MAXPATHLEN)
650 return false;
651
652 fd = open(nspath, O_RDONLY);
653 if (fd < 0) {
a9cb0fb8 654 SYSERROR("Failed to open %s", nspath);
51d0854c
DY
655 return false;
656 }
657
658 ret = setns(fd, 0);
659 if (ret) {
a9cb0fb8 660 SYSERROR("Failed to set process %d to %s of %d.", pid, ns, fd);
51d0854c
DY
661 close(fd);
662 return false;
663 }
b14fc100 664
51d0854c
DY
665 close(fd);
666 return true;
667}
668
b7f954bb
SH
669/*
670 * looking at fs/proc_namespace.c, it appears we can
671 * actually expect the rootfs entry to very specifically contain
672 * " - rootfs rootfs "
673 * IIUC, so long as we've chrooted so that rootfs is not our root,
674 * the rootfs entry should always be skipped in mountinfo contents.
675 */
fa454c8e 676bool detect_ramfs_rootfs(void)
b7f954bb 677{
b7f954bb 678 FILE *f;
fa454c8e
CB
679 char *p, *p2;
680 char *line = NULL;
681 size_t len = 0;
b7f954bb 682 int i;
b7f954bb
SH
683
684 f = fopen("/proc/self/mountinfo", "r");
685 if (!f)
fa454c8e
CB
686 return false;
687
688 while (getline(&line, &len, f) != -1) {
689 for (p = line, i = 0; p && i < 4; i++)
690 p = strchr(p + 1, ' ');
b7f954bb
SH
691 if (!p)
692 continue;
b14fc100 693
fa454c8e 694 p2 = strchr(p + 1, ' ');
b7f954bb
SH
695 if (!p2)
696 continue;
b14fc100 697
b7f954bb 698 *p2 = '\0';
fa454c8e 699 if (strcmp(p + 1, "/") == 0) {
1a0e70ac 700 /* This is '/'. Is it the ramfs? */
fa454c8e 701 p = strchr(p2 + 1, '-');
b7f954bb 702 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
fa454c8e 703 free(line);
b7f954bb 704 fclose(f);
8ce1abc2 705 INFO("Rootfs is located on ramfs");
fa454c8e 706 return true;
b7f954bb
SH
707 }
708 }
709 }
b14fc100 710
fa454c8e 711 free(line);
b7f954bb 712 fclose(f);
fa454c8e 713 return false;
b7f954bb
SH
714}
715
37ef15bb
CB
716char *on_path(const char *cmd, const char *rootfs)
717{
84c5549b 718 char *entry = NULL, *path = NULL;
0e6e3a41
SG
719 char cmdpath[MAXPATHLEN];
720 int ret;
721
722 path = getenv("PATH");
723 if (!path)
8afb3e61 724 return NULL;
0e6e3a41
SG
725
726 path = strdup(path);
727 if (!path)
8afb3e61 728 return NULL;
0e6e3a41 729
37ef15bb 730 lxc_iterate_parts (entry, path, ":") {
9d9c111c 731 if (rootfs)
37ef15bb
CB
732 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s/%s", rootfs,
733 entry, cmd);
9d9c111c
SH
734 else
735 ret = snprintf(cmdpath, MAXPATHLEN, "%s/%s", entry, cmd);
0e6e3a41 736 if (ret < 0 || ret >= MAXPATHLEN)
84c5549b 737 continue;
0e6e3a41
SG
738
739 if (access(cmdpath, X_OK) == 0) {
740 free(path);
8afb3e61 741 return strdup(cmdpath);
0e6e3a41 742 }
0e6e3a41
SG
743 }
744
745 free(path);
8afb3e61 746 return NULL;
0e6e3a41 747}
76a26f55 748
12983ba4
SH
749bool cgns_supported(void)
750{
751 return file_exists("/proc/self/ns/cgroup");
752}
753
9d9c111c
SH
754/* historically lxc-init has been under /usr/lib/lxc and under
755 * /usr/lib/$ARCH/lxc. It now lives as $prefix/sbin/init.lxc.
756 */
757char *choose_init(const char *rootfs)
758{
759 char *retv = NULL;
370ec268
SF
760 const char *empty = "",
761 *tmp;
9d9c111c 762 int ret, env_set = 0;
9d9c111c
SH
763
764 if (!getenv("PATH")) {
765 if (setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 0))
766 SYSERROR("Failed to setenv");
b14fc100 767
9d9c111c
SH
768 env_set = 1;
769 }
770
771 retv = on_path("init.lxc", rootfs);
772
773 if (env_set) {
774 if (unsetenv("PATH"))
775 SYSERROR("Failed to unsetenv");
776 }
777
778 if (retv)
779 return retv;
780
781 retv = malloc(PATH_MAX);
782 if (!retv)
783 return NULL;
784
785 if (rootfs)
370ec268 786 tmp = rootfs;
9d9c111c 787 else
370ec268
SF
788 tmp = empty;
789
790 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, SBINDIR, "/init.lxc");
9d9c111c
SH
791 if (ret < 0 || ret >= PATH_MAX) {
792 ERROR("pathname too long");
793 goto out1;
794 }
b14fc100 795
e57cd7e9 796 if (access(retv, X_OK) == 0)
9d9c111c
SH
797 return retv;
798
370ec268 799 ret = snprintf(retv, PATH_MAX, "%s/%s/%s", tmp, LXCINITDIR, "/lxc/lxc-init");
9d9c111c
SH
800 if (ret < 0 || ret >= PATH_MAX) {
801 ERROR("pathname too long");
802 goto out1;
803 }
b14fc100 804
e57cd7e9 805 if (access(retv, X_OK) == 0)
9d9c111c
SH
806 return retv;
807
370ec268 808 ret = snprintf(retv, PATH_MAX, "%s/usr/lib/lxc/lxc-init", tmp);
9d9c111c
SH
809 if (ret < 0 || ret >= PATH_MAX) {
810 ERROR("pathname too long");
811 goto out1;
812 }
b14fc100 813
e57cd7e9 814 if (access(retv, X_OK) == 0)
9d9c111c
SH
815 return retv;
816
370ec268 817 ret = snprintf(retv, PATH_MAX, "%s/sbin/lxc-init", tmp);
9d9c111c
SH
818 if (ret < 0 || ret >= PATH_MAX) {
819 ERROR("pathname too long");
820 goto out1;
821 }
b14fc100 822
e57cd7e9 823 if (access(retv, X_OK) == 0)
9d9c111c
SH
824 return retv;
825
826 /*
827 * Last resort, look for the statically compiled init.lxc which we
828 * hopefully bind-mounted in.
829 * If we are called during container setup, and we get to this point,
830 * then the init.lxc.static from the host will need to be bind-mounted
831 * in. So we return NULL here to indicate that.
832 */
833 if (rootfs)
834 goto out1;
835
836 ret = snprintf(retv, PATH_MAX, "/init.lxc.static");
837 if (ret < 0 || ret >= PATH_MAX) {
838 WARN("Nonsense - name /lxc.init.static too long");
839 goto out1;
840 }
b14fc100 841
e57cd7e9 842 if (access(retv, X_OK) == 0)
9d9c111c
SH
843 return retv;
844
845out1:
846 free(retv);
847 return NULL;
848}
735f2c6e 849
6010a416
SG
850/*
851 * Given the '-t' template option to lxc-create, figure out what to
852 * do. If the template is a full executable path, use that. If it
853 * is something like 'sshd', then return $templatepath/lxc-sshd.
854 * On success return the template, on error return NULL.
855 */
856char *get_template_path(const char *t)
857{
858 int ret, len;
859 char *tpath;
860
861 if (t[0] == '/' && access(t, X_OK) == 0) {
862 tpath = strdup(t);
863 return tpath;
864 }
865
866 len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1;
b14fc100 867
6010a416
SG
868 tpath = malloc(len);
869 if (!tpath)
870 return NULL;
b14fc100 871
6010a416
SG
872 ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t);
873 if (ret < 0 || ret >= len) {
874 free(tpath);
875 return NULL;
876 }
b14fc100 877
6010a416
SG
878 if (access(tpath, X_OK) < 0) {
879 SYSERROR("bad template: %s", t);
880 free(tpath);
881 return NULL;
882 }
883
884 return tpath;
885}
0a4be28d 886
592fd47a
SH
887/*
888 * @path: a pathname where / replaced with '\0'.
889 * @offsetp: pointer to int showing which path segment was last seen.
890 * Updated on return to reflect the next segment.
891 * @fulllen: full original path length.
892 * Returns a pointer to the next path segment, or NULL if done.
893 */
894static char *get_nextpath(char *path, int *offsetp, int fulllen)
895{
896 int offset = *offsetp;
897
898 if (offset >= fulllen)
899 return NULL;
900
901 while (path[offset] != '\0' && offset < fulllen)
902 offset++;
b14fc100 903
592fd47a
SH
904 while (path[offset] == '\0' && offset < fulllen)
905 offset++;
906
907 *offsetp = offset;
908 return (offset < fulllen) ? &path[offset] : NULL;
909}
910
911/*
912 * Check that @subdir is a subdir of @dir. @len is the length of
913 * @dir (to avoid having to recalculate it).
914 */
915static bool is_subdir(const char *subdir, const char *dir, size_t len)
916{
917 size_t subdirlen = strlen(subdir);
918
919 if (subdirlen < len)
920 return false;
b14fc100 921
592fd47a
SH
922 if (strncmp(subdir, dir, len) != 0)
923 return false;
b14fc100 924
592fd47a
SH
925 if (dir[len-1] == '/')
926 return true;
b14fc100 927
592fd47a
SH
928 if (subdir[len] == '/' || subdirlen == len)
929 return true;
b14fc100 930
592fd47a
SH
931 return false;
932}
933
934/*
935 * Check if the open fd is a symlink. Return -ELOOP if it is. Return
936 * -ENOENT if we couldn't fstat. Return 0 if the fd is ok.
937 */
938static int check_symlink(int fd)
939{
940 struct stat sb;
b14fc100 941 int ret;
942
943 ret = fstat(fd, &sb);
592fd47a
SH
944 if (ret < 0)
945 return -ENOENT;
b14fc100 946
592fd47a
SH
947 if (S_ISLNK(sb.st_mode))
948 return -ELOOP;
b14fc100 949
592fd47a
SH
950 return 0;
951}
952
953/*
954 * Open a file or directory, provided that it contains no symlinks.
955 *
956 * CAVEAT: This function must not be used for other purposes than container
957 * setup before executing the container's init
958 */
959static int open_if_safe(int dirfd, const char *nextpath)
960{
961 int newfd = openat(dirfd, nextpath, O_RDONLY | O_NOFOLLOW);
1a0e70ac 962 if (newfd >= 0) /* Was not a symlink, all good. */
592fd47a
SH
963 return newfd;
964
965 if (errno == ELOOP)
966 return newfd;
967
968 if (errno == EPERM || errno == EACCES) {
1a0e70ac
CB
969 /* We're not root (cause we got EPERM) so try opening with
970 * O_PATH.
971 */
592fd47a
SH
972 newfd = openat(dirfd, nextpath, O_PATH | O_NOFOLLOW);
973 if (newfd >= 0) {
1a0e70ac
CB
974 /* O_PATH will return an fd for symlinks. We know
975 * nextpath wasn't a symlink at last openat, so if fd is
976 * now a link, then something * fishy is going on.
592fd47a
SH
977 */
978 int ret = check_symlink(newfd);
979 if (ret < 0) {
980 close(newfd);
981 newfd = ret;
982 }
983 }
984 }
985
986 return newfd;
987}
988
989/*
990 * Open a path intending for mounting, ensuring that the final path
991 * is inside the container's rootfs.
992 *
993 * CAVEAT: This function must not be used for other purposes than container
994 * setup before executing the container's init
995 *
996 * @target: path to be opened
997 * @prefix_skip: a part of @target in which to ignore symbolic links. This
998 * would be the container's rootfs.
999 *
1000 * Return an open fd for the path, or <0 on error.
1001 */
1002static int open_without_symlink(const char *target, const char *prefix_skip)
1003{
1004 int curlen = 0, dirfd, fulllen, i;
1005 char *dup = NULL;
1006
1007 fulllen = strlen(target);
1008
1009 /* make sure prefix-skip makes sense */
01074e5b 1010 if (prefix_skip && strlen(prefix_skip) > 0) {
592fd47a
SH
1011 curlen = strlen(prefix_skip);
1012 if (!is_subdir(target, prefix_skip, curlen)) {
1013 ERROR("WHOA there - target '%s' didn't start with prefix '%s'",
1014 target, prefix_skip);
1015 return -EINVAL;
1016 }
b14fc100 1017
592fd47a
SH
1018 /*
1019 * get_nextpath() expects the curlen argument to be
1020 * on a (turned into \0) / or before it, so decrement
1021 * curlen to make sure that happens
1022 */
1023 if (curlen)
1024 curlen--;
1025 } else {
1026 prefix_skip = "/";
1027 curlen = 0;
1028 }
1029
1030 /* Make a copy of target which we can hack up, and tokenize it */
1031 if ((dup = strdup(target)) == NULL) {
1032 SYSERROR("Out of memory checking for symbolic link");
1033 return -ENOMEM;
1034 }
b14fc100 1035
592fd47a
SH
1036 for (i = 0; i < fulllen; i++) {
1037 if (dup[i] == '/')
1038 dup[i] = '\0';
1039 }
1040
1041 dirfd = open(prefix_skip, O_RDONLY);
1042 if (dirfd < 0)
1043 goto out;
b14fc100 1044
592fd47a
SH
1045 while (1) {
1046 int newfd, saved_errno;
1047 char *nextpath;
1048
1049 if ((nextpath = get_nextpath(dup, &curlen, fulllen)) == NULL)
1050 goto out;
b14fc100 1051
592fd47a
SH
1052 newfd = open_if_safe(dirfd, nextpath);
1053 saved_errno = errno;
1054 close(dirfd);
b14fc100 1055
592fd47a
SH
1056 dirfd = newfd;
1057 if (newfd < 0) {
1058 errno = saved_errno;
1059 if (errno == ELOOP)
1060 SYSERROR("%s in %s was a symbolic link!", nextpath, target);
b14fc100 1061
592fd47a
SH
1062 goto out;
1063 }
1064 }
1065
1066out:
1067 free(dup);
1068 return dirfd;
1069}
1070
1071/*
1072 * Safely mount a path into a container, ensuring that the mount target
1073 * is under the container's @rootfs. (If @rootfs is NULL, then the container
1074 * uses the host's /)
1075 *
1076 * CAVEAT: This function must not be used for other purposes than container
1077 * setup before executing the container's init
1078 */
1079int safe_mount(const char *src, const char *dest, const char *fstype,
1080 unsigned long flags, const void *data, const char *rootfs)
1081{
1a0e70ac
CB
1082 int destfd, ret, saved_errno;
1083 /* Only needs enough for /proc/self/fd/<fd>. */
1084 char srcbuf[50], destbuf[50];
1085 int srcfd = -1;
592fd47a
SH
1086 const char *mntsrc = src;
1087
1088 if (!rootfs)
1089 rootfs = "";
1090
1091 /* todo - allow symlinks for relative paths if 'allowsymlinks' option is passed */
1092 if (flags & MS_BIND && src && src[0] != '/') {
1093 INFO("this is a relative bind mount");
b14fc100 1094
592fd47a
SH
1095 srcfd = open_without_symlink(src, NULL);
1096 if (srcfd < 0)
1097 return srcfd;
b14fc100 1098
592fd47a
SH
1099 ret = snprintf(srcbuf, 50, "/proc/self/fd/%d", srcfd);
1100 if (ret < 0 || ret > 50) {
1101 close(srcfd);
1102 ERROR("Out of memory");
1103 return -EINVAL;
1104 }
1105 mntsrc = srcbuf;
1106 }
1107
1108 destfd = open_without_symlink(dest, rootfs);
1109 if (destfd < 0) {
88e078ba
CB
1110 if (srcfd != -1) {
1111 saved_errno = errno;
592fd47a 1112 close(srcfd);
88e078ba
CB
1113 errno = saved_errno;
1114 }
b14fc100 1115
592fd47a
SH
1116 return destfd;
1117 }
1118
1119 ret = snprintf(destbuf, 50, "/proc/self/fd/%d", destfd);
1120 if (ret < 0 || ret > 50) {
1121 if (srcfd != -1)
1122 close(srcfd);
b14fc100 1123
592fd47a
SH
1124 close(destfd);
1125 ERROR("Out of memory");
1126 return -EINVAL;
1127 }
1128
1129 ret = mount(mntsrc, destbuf, fstype, flags, data);
1130 saved_errno = errno;
1131 if (srcfd != -1)
1132 close(srcfd);
b14fc100 1133
592fd47a
SH
1134 close(destfd);
1135 if (ret < 0) {
1136 errno = saved_errno;
0103eb53 1137 SYSERROR("Failed to mount %s onto %s", src ? src : "(null)", dest);
592fd47a
SH
1138 return ret;
1139 }
1140
1141 return 0;
1142}
1143
ced03a01
SH
1144/*
1145 * Mount a proc under @rootfs if proc self points to a pid other than
1146 * my own. This is needed to have a known-good proc mount for setting
1147 * up LSMs both at container startup and attach.
1148 *
1149 * @rootfs : the rootfs where proc should be mounted
1150 *
1151 * Returns < 0 on failure, 0 if the correct proc was already mounted
1152 * and 1 if a new proc was mounted.
f267d666
BP
1153 *
1154 * NOTE: not to be called from inside the container namespace!
ced03a01 1155 */
943144d9 1156int lxc_mount_proc_if_needed(const char *rootfs)
ced03a01
SH
1157{
1158 char path[MAXPATHLEN];
6b1ba5d6
CB
1159 int link_to_pid, linklen, mypid, ret;
1160 char link[LXC_NUMSTRLEN64] = {0};
ced03a01
SH
1161
1162 ret = snprintf(path, MAXPATHLEN, "%s/proc/self", rootfs);
1163 if (ret < 0 || ret >= MAXPATHLEN) {
1164 SYSERROR("proc path name too long");
1165 return -1;
1166 }
fc2ad9dc 1167
6b1ba5d6 1168 linklen = readlink(path, link, LXC_NUMSTRLEN64);
fc2ad9dc 1169
ced03a01 1170 ret = snprintf(path, MAXPATHLEN, "%s/proc", rootfs);
d539a2b2
CB
1171 if (ret < 0 || ret >= MAXPATHLEN) {
1172 SYSERROR("proc path name too long");
1173 return -1;
1174 }
fc2ad9dc
CB
1175
1176 /* /proc not mounted */
1177 if (linklen < 0) {
1178 if (mkdir(path, 0755) && errno != EEXIST)
1179 return -1;
b14fc100 1180
ced03a01 1181 goto domount;
6b1ba5d6
CB
1182 } else if (linklen >= LXC_NUMSTRLEN64) {
1183 link[linklen - 1] = '\0';
1184 ERROR("readlink returned truncated content: \"%s\"", link);
1185 return -1;
fc2ad9dc
CB
1186 }
1187
0059379f 1188 mypid = lxc_raw_getpid();
6b1ba5d6
CB
1189 INFO("I am %d, /proc/self points to \"%s\"", mypid, link);
1190
2d036cca
CB
1191 if (lxc_safe_int(link, &link_to_pid) < 0)
1192 return -1;
fc2ad9dc 1193
6b1ba5d6
CB
1194 /* correct procfs is already mounted */
1195 if (link_to_pid == mypid)
1196 return 0;
fc2ad9dc 1197
6b1ba5d6
CB
1198 ret = umount2(path, MNT_DETACH);
1199 if (ret < 0)
1200 WARN("failed to umount \"%s\" with MNT_DETACH", path);
ced03a01
SH
1201
1202domount:
fc2ad9dc 1203 /* rootfs is NULL */
6b1ba5d6 1204 if (!strcmp(rootfs, ""))
f267d666
BP
1205 ret = mount("proc", path, "proc", 0, NULL);
1206 else
1207 ret = safe_mount("proc", path, "proc", 0, NULL, rootfs);
f267d666 1208 if (ret < 0)
ced03a01 1209 return -1;
f267d666 1210
fc2ad9dc 1211 INFO("mounted /proc in container for security transition");
ced03a01
SH
1212 return 1;
1213}
69aeabac 1214
f8dd0275 1215int open_devnull(void)
69aeabac 1216{
f8dd0275
AM
1217 int fd = open("/dev/null", O_RDWR);
1218
1219 if (fd < 0)
1220 SYSERROR("Can't open /dev/null");
1221
1222 return fd;
1223}
69aeabac 1224
f8dd0275
AM
1225int set_stdfds(int fd)
1226{
bbbf65ee
CB
1227 int ret;
1228
69aeabac
TA
1229 if (fd < 0)
1230 return -1;
1231
bbbf65ee
CB
1232 ret = dup2(fd, STDIN_FILENO);
1233 if (ret < 0)
f8dd0275 1234 return -1;
bbbf65ee
CB
1235
1236 ret = dup2(fd, STDOUT_FILENO);
1237 if (ret < 0)
f8dd0275 1238 return -1;
bbbf65ee
CB
1239
1240 ret = dup2(fd, STDERR_FILENO);
1241 if (ret < 0)
f8dd0275
AM
1242 return -1;
1243
1244 return 0;
1245}
1246
1247int null_stdfds(void)
1248{
1249 int ret = -1;
b14fc100 1250 int fd;
f8dd0275 1251
b14fc100 1252 fd = open_devnull();
f8dd0275
AM
1253 if (fd >= 0) {
1254 ret = set_stdfds(fd);
1255 close(fd);
1256 }
69aeabac 1257
69aeabac
TA
1258 return ret;
1259}
ccb4cabe 1260
330ae3d3 1261/* Check whether a signal is blocked by a process. */
de3c491b 1262/* /proc/pid-to-str/status\0 = (5 + 21 + 7 + 1) */
eabf1ea9 1263#define __PROC_STATUS_LEN (6 + (LXC_NUMSTRLEN64) + 7 + 1)
573ad77f 1264bool task_blocks_signal(pid_t pid, int signal)
330ae3d3 1265{
330ae3d3 1266 int ret;
de3c491b 1267 char status[__PROC_STATUS_LEN];
eabf1ea9 1268 FILE *f;
573ad77f 1269 uint64_t sigblk = 0, one = 1;
eabf1ea9
CB
1270 size_t n = 0;
1271 bool bret = false;
1272 char *line = NULL;
330ae3d3 1273
de3c491b
CB
1274 ret = snprintf(status, __PROC_STATUS_LEN, "/proc/%d/status", pid);
1275 if (ret < 0 || ret >= __PROC_STATUS_LEN)
330ae3d3
CB
1276 return bret;
1277
1278 f = fopen(status, "r");
1279 if (!f)
1280 return bret;
1281
1282 while (getline(&line, &n, f) != -1) {
573ad77f
CB
1283 char *numstr;
1284
eabf1ea9 1285 if (strncmp(line, "SigBlk:", 7))
6fbcbe3b
CB
1286 continue;
1287
573ad77f
CB
1288 numstr = lxc_trim_whitespace_in_place(line + 7);
1289 ret = lxc_safe_uint64(numstr, &sigblk, 16);
1290 if (ret < 0)
6fbcbe3b 1291 goto out;
573ad77f
CB
1292
1293 break;
330ae3d3
CB
1294 }
1295
573ad77f 1296 if (sigblk & (one << (signal - 1)))
330ae3d3
CB
1297 bret = true;
1298
1299out:
1300 free(line);
1301 fclose(f);
1302 return bret;
1303}
000dfda7 1304
a687256f
CB
1305int lxc_preserve_ns(const int pid, const char *ns)
1306{
1307 int ret;
a052913d
CB
1308/* 5 /proc + 21 /int_as_str + 3 /ns + 20 /NS_NAME + 1 \0 */
1309#define __NS_PATH_LEN 50
1310 char path[__NS_PATH_LEN];
a687256f 1311
4d8ac866
CB
1312 /* This way we can use this function to also check whether namespaces
1313 * are supported by the kernel by passing in the NULL or the empty
1314 * string.
1315 */
a052913d 1316 ret = snprintf(path, __NS_PATH_LEN, "/proc/%d/ns%s%s", pid,
4d8ac866
CB
1317 !ns || strcmp(ns, "") == 0 ? "" : "/",
1318 !ns || strcmp(ns, "") == 0 ? "" : ns);
ea918412 1319 if (ret < 0 || (size_t)ret >= __NS_PATH_LEN) {
1320 errno = EFBIG;
1321 return -1;
1322 }
a687256f
CB
1323
1324 return open(path, O_RDONLY | O_CLOEXEC);
1325}
6bc2eafe 1326
dbaf55a3
CB
1327int lxc_switch_uid_gid(uid_t uid, gid_t gid)
1328{
1329 if (setgid(gid) < 0) {
1330 SYSERROR("Failed to switch to gid %d.", gid);
1331 return -errno;
1332 }
1333 NOTICE("Switched to gid %d.", gid);
1334
1335 if (setuid(uid) < 0) {
1336 SYSERROR("Failed to switch to uid %d.", uid);
1337 return -errno;
1338 }
1339 NOTICE("Switched to uid %d.", uid);
1340
1341 return 0;
1342}
1343
1344/* Simple covenience function which enables uniform logging. */
1345int lxc_setgroups(int size, gid_t list[])
1346{
1347 if (setgroups(size, list) < 0) {
1348 SYSERROR("Failed to setgroups().");
1349 return -errno;
1350 }
1351 NOTICE("Dropped additional groups.");
1352
1353 return 0;
1354}
c6868a1f
CB
1355
1356static int lxc_get_unused_loop_dev_legacy(char *loop_name)
1357{
1358 struct dirent *dp;
1359 struct loop_info64 lo64;
1360 DIR *dir;
1361 int dfd = -1, fd = -1, ret = -1;
1362
1363 dir = opendir("/dev");
1364 if (!dir)
1365 return -1;
1366
1367 while ((dp = readdir(dir))) {
c6868a1f
CB
1368 if (strncmp(dp->d_name, "loop", 4) != 0)
1369 continue;
1370
1371 dfd = dirfd(dir);
1372 if (dfd < 0)
1373 continue;
1374
1375 fd = openat(dfd, dp->d_name, O_RDWR);
1376 if (fd < 0)
1377 continue;
1378
1379 ret = ioctl(fd, LOOP_GET_STATUS64, &lo64);
1380 if (ret < 0) {
1381 if (ioctl(fd, LOOP_GET_STATUS64, &lo64) == 0 ||
1382 errno != ENXIO) {
1383 close(fd);
1384 fd = -1;
1385 continue;
1386 }
1387 }
1388
1389 ret = snprintf(loop_name, LO_NAME_SIZE, "/dev/%s", dp->d_name);
1390 if (ret < 0 || ret >= LO_NAME_SIZE) {
1391 close(fd);
1392 fd = -1;
1393 continue;
1394 }
1395
1396 break;
1397 }
1398
1399 closedir(dir);
1400
1401 if (fd < 0)
1402 return -1;
1403
1404 return fd;
1405}
1406
1407static int lxc_get_unused_loop_dev(char *name_loop)
1408{
1409 int loop_nr, ret;
1410 int fd_ctl = -1, fd_tmp = -1;
1411
1412 fd_ctl = open("/dev/loop-control", O_RDWR | O_CLOEXEC);
1413 if (fd_ctl < 0)
1414 return -ENODEV;
1415
1416 loop_nr = ioctl(fd_ctl, LOOP_CTL_GET_FREE);
1417 if (loop_nr < 0)
1418 goto on_error;
1419
1420 ret = snprintf(name_loop, LO_NAME_SIZE, "/dev/loop%d", loop_nr);
1421 if (ret < 0 || ret >= LO_NAME_SIZE)
1422 goto on_error;
1423
1424 fd_tmp = open(name_loop, O_RDWR | O_CLOEXEC);
1425 if (fd_tmp < 0)
1426 goto on_error;
1427
1428on_error:
1429 close(fd_ctl);
1430 return fd_tmp;
1431}
1432
1433int lxc_prepare_loop_dev(const char *source, char *loop_dev, int flags)
1434{
1435 int ret;
1436 struct loop_info64 lo64;
1437 int fd_img = -1, fret = -1, fd_loop = -1;
1438
1439 fd_loop = lxc_get_unused_loop_dev(loop_dev);
1440 if (fd_loop < 0) {
1441 if (fd_loop == -ENODEV)
1442 fd_loop = lxc_get_unused_loop_dev_legacy(loop_dev);
1443 else
1444 goto on_error;
1445 }
1446
1447 fd_img = open(source, O_RDWR | O_CLOEXEC);
1448 if (fd_img < 0)
1449 goto on_error;
1450
1451 ret = ioctl(fd_loop, LOOP_SET_FD, fd_img);
1452 if (ret < 0)
1453 goto on_error;
1454
1455 memset(&lo64, 0, sizeof(lo64));
1456 lo64.lo_flags = flags;
1457
1458 ret = ioctl(fd_loop, LOOP_SET_STATUS64, &lo64);
1459 if (ret < 0)
1460 goto on_error;
1461
1462 fret = 0;
1463
1464on_error:
1465 if (fd_img >= 0)
1466 close(fd_img);
1467
1468 if (fret < 0 && fd_loop >= 0) {
1469 close(fd_loop);
1470 fd_loop = -1;
1471 }
1472
1473 return fd_loop;
1474}
74251e49
CB
1475
1476int lxc_unstack_mountpoint(const char *path, bool lazy)
1477{
1478 int ret;
1479 int umounts = 0;
1480
1481pop_stack:
1482 ret = umount2(path, lazy ? MNT_DETACH : 0);
1483 if (ret < 0) {
1484 /* We consider anything else than EINVAL deadly to prevent going
1485 * into an infinite loop. (The other alternative is constantly
1486 * parsing /proc/self/mountinfo which is yucky and probably
1487 * racy.)
1488 */
1489 if (errno != EINVAL)
1490 return -errno;
1491 } else {
b4a40f7b
CB
1492 /* Just stop counting when this happens. That'd just be so
1493 * stupid that we won't even bother trying to report back the
1494 * correct value anymore.
1495 */
1496 if (umounts != INT_MAX)
1497 umounts++;
b14fc100 1498
74251e49
CB
1499 /* We succeeded in umounting. Make sure that there's no other
1500 * mountpoint stacked underneath.
1501 */
74251e49
CB
1502 goto pop_stack;
1503 }
1504
1505 return umounts;
1506}
ea3a694f
CB
1507
1508int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
1509{
1510 pid_t child;
1511 int ret, fret, pipefd[2];
1512 ssize_t bytes;
1513
46210729 1514 /* Make sure our callers do not receive uninitialized memory. */
ea3a694f
CB
1515 if (buf_size > 0 && buf)
1516 buf[0] = '\0';
1517
1518 if (pipe(pipefd) < 0) {
1519 SYSERROR("failed to create pipe");
1520 return -1;
1521 }
1522
2d728b2f 1523 child = lxc_raw_clone(0);
ea3a694f
CB
1524 if (child < 0) {
1525 close(pipefd[0]);
1526 close(pipefd[1]);
1527 SYSERROR("failed to create new process");
1528 return -1;
1529 }
1530
1531 if (child == 0) {
1532 /* Close the read-end of the pipe. */
1533 close(pipefd[0]);
1534
1535 /* Redirect std{err,out} to write-end of the
1536 * pipe.
1537 */
1538 ret = dup2(pipefd[1], STDOUT_FILENO);
1539 if (ret >= 0)
1540 ret = dup2(pipefd[1], STDERR_FILENO);
1541
1542 /* Close the write-end of the pipe. */
1543 close(pipefd[1]);
1544
1545 if (ret < 0) {
1546 SYSERROR("failed to duplicate std{err,out} file descriptor");
d8b3f9c3 1547 _exit(EXIT_FAILURE);
ea3a694f
CB
1548 }
1549
1550 /* Does not return. */
1551 child_fn(args);
1552 ERROR("failed to exec command");
d8b3f9c3 1553 _exit(EXIT_FAILURE);
ea3a694f
CB
1554 }
1555
1556 /* close the write-end of the pipe */
1557 close(pipefd[1]);
1558
7a643c7c 1559 if (buf && buf_size > 0) {
a5bc6cb0 1560 bytes = lxc_read_nointr(pipefd[0], buf, buf_size - 1);
7a643c7c
CB
1561 if (bytes > 0)
1562 buf[bytes - 1] = '\0';
1563 }
ea3a694f
CB
1564
1565 fret = wait_for_pid(child);
1566 /* close the read-end of the pipe */
1567 close(pipefd[0]);
1568
1569 return fret;
1570}
04ad7ffe 1571
d75c14e2
CB
1572bool lxc_nic_exists(char *nic)
1573{
1574#define __LXC_SYS_CLASS_NET_LEN 15 + IFNAMSIZ + 1
1575 char path[__LXC_SYS_CLASS_NET_LEN];
1576 int ret;
1577 struct stat sb;
1578
1579 if (!strcmp(nic, "none"))
1580 return true;
1581
1582 ret = snprintf(path, __LXC_SYS_CLASS_NET_LEN, "/sys/class/net/%s", nic);
1583 if (ret < 0 || (size_t)ret >= __LXC_SYS_CLASS_NET_LEN)
1584 return false;
1585
1586 ret = stat(path, &sb);
1587 if (ret < 0)
1588 return false;
1589
1590 return true;
1591}
127c6e70 1592
6222c3f4
CB
1593uint64_t lxc_find_next_power2(uint64_t n)
1594{
1595 /* 0 is not valid input. We return 0 to the caller since 0 is not a
1596 * valid power of two.
1597 */
1598 if (n == 0)
1599 return 0;
1600
1601 if (!(n & (n - 1)))
1602 return n;
1603
1604 while (n & (n - 1))
1605 n = n & (n - 1);
1606
1607 n = n << 1;
1608 return n;
1609}
1fd0f41e
CB
1610
1611int lxc_set_death_signal(int signal)
1612{
1613 int ret;
1614 pid_t ppid;
1615
b81689a1
CB
1616 ret = prctl(PR_SET_PDEATHSIG, prctl_arg(signal), prctl_arg(0),
1617 prctl_arg(0), prctl_arg(0));
1fd0f41e
CB
1618
1619 /* Check whether we have been orphaned. */
1620 ppid = (pid_t)syscall(SYS_getppid);
1621 if (ppid == 1) {
1622 pid_t self;
1623
1624 self = lxc_raw_getpid();
1625 ret = kill(self, SIGKILL);
1626 if (ret < 0)
1627 return -1;
1628 }
1629
1630 if (ret < 0) {
1631 SYSERROR("Failed to set PR_SET_PDEATHSIG to %d", signal);
1632 return -1;
1633 }
1634
1635 return 0;
1636}
7ad37670 1637
a9d4ebc1
CB
1638int fd_cloexec(int fd, bool cloexec)
1639{
1640 int oflags, nflags;
1641
1642 oflags = fcntl(fd, F_GETFD, 0);
1643 if (oflags < 0)
1644 return -errno;
1645
1646 if (cloexec)
1647 nflags = oflags | FD_CLOEXEC;
1648 else
1649 nflags = oflags & ~FD_CLOEXEC;
1650
1651 if (nflags == oflags)
1652 return 0;
1653
1654 if (fcntl(fd, F_SETFD, nflags) < 0)
1655 return -errno;
1656
1657 return 0;
1658}
d7ab0375 1659
1660int recursive_destroy(char *dirname)
1661{
1662 int ret;
1663 struct dirent *direntp;
1664 DIR *dir;
1665 int r = 0;
1666
1667 dir = opendir(dirname);
1668 if (!dir)
1669 return -1;
1670
1671 while ((direntp = readdir(dir))) {
1672 char *pathname;
1673 struct stat mystat;
1674
1675 if (!strcmp(direntp->d_name, ".") ||
1676 !strcmp(direntp->d_name, ".."))
1677 continue;
1678
1679 pathname = must_make_path(dirname, direntp->d_name, NULL);
1680
1681 ret = lstat(pathname, &mystat);
1682 if (ret < 0) {
1683 if (!r)
1684 WARN("Failed to stat \"%s\"", pathname);
1685
1686 r = -1;
1687 goto next;
1688 }
1689
1690 if (!S_ISDIR(mystat.st_mode))
1691 goto next;
1692
1693 ret = recursive_destroy(pathname);
1694 if (ret < 0)
1695 r = -1;
1696
1697 next:
1698 free(pathname);
1699 }
1700
1701 ret = rmdir(dirname);
1702 if (ret < 0) {
1703 if (!r)
1704 SYSWARN("Failed to delete \"%s\"", dirname);
1705
1706 r = -1;
1707 }
1708
1709 ret = closedir(dir);
1710 if (ret < 0) {
1711 if (!r)
1712 SYSWARN("Failed to delete \"%s\"", dirname);
1713
1714 r = -1;
1715 }
1716
1717 return r;
1718}