bindings.c

   1 /* lxcfs
   2  *
   3  * Copyright © 2014-2016 Canonical, Inc
   4  * Author: Serge Hallyn <serge.hallyn@ubuntu.com>
   5  *
   6  * See COPYING file for details.
   7  */
   8
   9 #ifndef _GNU_SOURCE
  10 #define _GNU_SOURCE
  11 #endif
  12
  13 #ifndef FUSE_USE_VERSION
  14 #define FUSE_USE_VERSION 26
  15 #endif
  16
  17 #define _FILE_OFFSET_BITS 64
  18
  19 #include <dirent.h>
  20 #include <errno.h>
  21 #include <fcntl.h>
  22 #include <fuse.h>
  23 #include <inttypes.h>
  24 #include <libgen.h>
  25 #include <pthread.h>
  26 #include <sched.h>
  27 #include <stdarg.h>
  28 #include <stdbool.h>
  29 #include <stdint.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33 #include <time.h>
  34 #include <unistd.h>
  35 #include <wait.h>
  36 #include <linux/magic.h>
  37 #include <linux/sched.h>
  38 #include <sys/epoll.h>
  39 #include <sys/mman.h>
  40 #include <sys/mount.h>
  41 #include <sys/param.h>
  42 #include <sys/socket.h>
  43 #include <sys/syscall.h>
  44 #include <sys/sysinfo.h>
  45 #include <sys/vfs.h>
  46
  47 #include "bindings.h"
  48 #include "config.h"
  49 #include "cgroup_fuse.h"
  50 #include "cgroups/cgroup.h"
  51 #include "cgroups/cgroup_utils.h"
  52 #include "memory_utils.h"
  53 #include "proc_cpuview.h"
  54 #include "utils.h"
  55
  56 /* Define pivot_root() if missing from the C library */
  57 #ifndef HAVE_PIVOT_ROOT
  58 static int pivot_root(const char * new_root, const char * put_old)
  59 {
  60 #ifdef __NR_pivot_root
  61 return syscall(__NR_pivot_root, new_root, put_old);
  62 #else
  63 errno = ENOSYS;
  64 return -1;
  65 #endif
  66 }
  67 #else
  68 extern int pivot_root(const char * new_root, const char * put_old);
  69 #endif
  70
  71 /*
  72  * A table caching which pid is init for a pid namespace.
  73  * When looking up which pid is init for $qpid, we first
  74  * 1. Stat /proc/$qpid/ns/pid.
  75  * 2. Check whether the ino_t is in our store.
  76  *   a. if not, fork a child in qpid's ns to send us
  77  *       ucred.pid = 1, and read the initpid.  Cache
  78  *       initpid and creation time for /proc/initpid
  79  *       in a new store entry.
  80  *   b. if so, verify that /proc/initpid still matches
  81  *       what we have saved.  If not, clear the store
  82  *       entry and go back to a.  If so, return the
  83  *       cached initpid.
  84  */
  85 struct pidns_init_store {
  86         ino_t ino;          // inode number for /proc/$pid/ns/pid
  87         pid_t initpid;      // the pid of nit in that ns
  88         long int ctime;     // the time at which /proc/$initpid was created
  89         struct pidns_init_store *next;
  90         long int lastcheck;
  91 };
  92
  93 /* lol - look at how they are allocated in the kernel */
  94 #define PIDNS_HASH_SIZE 4096
  95 #define HASH(x) ((x) % PIDNS_HASH_SIZE)
  96
  97 static struct pidns_init_store *pidns_hash_table[PIDNS_HASH_SIZE];
  98 static pthread_mutex_t pidns_store_mutex = PTHREAD_MUTEX_INITIALIZER;
  99 static void lock_mutex(pthread_mutex_t *l)
 100 {
 101         int ret;
 102
 103         if ((ret = pthread_mutex_lock(l)) != 0) {
 104                 lxcfs_error("returned:%d %s\n", ret, strerror(ret));
 105                 exit(1);
 106         }
 107 }
 108
 109 struct cgroup_ops *cgroup_ops;
 110
 111 static void unlock_mutex(pthread_mutex_t *l)
 112 {
 113         int ret;
 114
 115         if ((ret = pthread_mutex_unlock(l)) != 0) {
 116                 lxcfs_error("returned:%d %s\n", ret, strerror(ret));
 117                 exit(1);
 118         }
 119 }
 120
 121 static void store_lock(void)
 122 {
 123         lock_mutex(&pidns_store_mutex);
 124 }
 125
 126 static void store_unlock(void)
 127 {
 128         unlock_mutex(&pidns_store_mutex);
 129 }
 130
 131 /* Must be called under store_lock */
 132 static bool initpid_still_valid(struct pidns_init_store *e, struct stat *nsfdsb)
 133 {
 134         struct stat initsb;
 135         char fnam[100];
 136
 137         snprintf(fnam, 100, "/proc/%d", e->initpid);
 138         if (stat(fnam, &initsb) < 0)
 139                 return false;
 140
 141         lxcfs_debug("Comparing ctime %ld == %ld for pid %d.\n", e->ctime,
 142                     initsb.st_ctime, e->initpid);
 143
 144         if (e->ctime != initsb.st_ctime)
 145                 return false;
 146         return true;
 147 }
 148
 149 /* Must be called under store_lock */
 150 static void remove_initpid(struct pidns_init_store *e)
 151 {
 152         struct pidns_init_store *tmp;
 153         int h;
 154
 155         lxcfs_debug("Remove_initpid: removing entry for %d.\n", e->initpid);
 156
 157         h = HASH(e->ino);
 158         if (pidns_hash_table[h] == e) {
 159                 pidns_hash_table[h] = e->next;
 160                 free_disarm(e);
 161                 return;
 162         }
 163
 164         tmp = pidns_hash_table[h];
 165         while (tmp) {
 166                 if (tmp->next == e) {
 167                         tmp->next = e->next;
 168                         free_disarm(e);
 169                         return;
 170                 }
 171                 tmp = tmp->next;
 172         }
 173 }
 174
 175 #define PURGE_SECS 5
 176 /* Must be called under store_lock */
 177 static void prune_initpid_store(void)
 178 {
 179         static long int last_prune = 0;
 180         struct pidns_init_store *e, *prev, *delme;
 181         long int now, threshold;
 182         int i;
 183
 184         if (!last_prune) {
 185                 last_prune = time(NULL);
 186                 return;
 187         }
 188         now = time(NULL);
 189         if (now < last_prune + PURGE_SECS)
 190                 return;
 191
 192         lxcfs_debug("%s\n", "Pruning.");
 193
 194         last_prune = now;
 195         threshold = now - 2 * PURGE_SECS;
 196
 197         for (i = 0; i < PIDNS_HASH_SIZE; i++) {
 198                 for (prev = NULL, e = pidns_hash_table[i]; e; ) {
 199                         if (e->lastcheck < threshold) {
 200
 201                                 lxcfs_debug("Removing cached entry for %d.\n", e->initpid);
 202
 203                                 delme = e;
 204                                 if (prev)
 205                                         prev->next = e->next;
 206                                 else
 207                                         pidns_hash_table[i] = e->next;
 208                                 e = e->next;
 209                                 free_disarm(delme);
 210                         } else {
 211                                 prev = e;
 212                                 e = e->next;
 213                         }
 214                 }
 215         }
 216 }
 217
 218 /* Must be called under store_lock */
 219 static void save_initpid(struct stat *sb, pid_t pid)
 220 {
 221         struct pidns_init_store *e;
 222         char fpath[100];
 223         struct stat procsb;
 224         int h;
 225
 226         lxcfs_debug("Save_initpid: adding entry for %d.\n", pid);
 227
 228         snprintf(fpath, 100, "/proc/%d", pid);
 229         if (stat(fpath, &procsb) < 0)
 230                 return;
 231         do {
 232                 e = malloc(sizeof(*e));
 233         } while (!e);
 234         e->ino = sb->st_ino;
 235         e->initpid = pid;
 236         e->ctime = procsb.st_ctime;
 237         h = HASH(e->ino);
 238         e->next = pidns_hash_table[h];
 239         e->lastcheck = time(NULL);
 240         pidns_hash_table[h] = e;
 241 }
 242
 243 /*
 244  * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store
 245  * entry for the inode number and creation time.  Verify that the init pid
 246  * is still valid.  If not, remove it.  Return the entry if valid, NULL
 247  * otherwise.
 248  * Must be called under store_lock
 249  */
 250 static struct pidns_init_store *lookup_verify_initpid(struct stat *sb)
 251 {
 252         int h = HASH(sb->st_ino);
 253         struct pidns_init_store *e = pidns_hash_table[h];
 254
 255         while (e) {
 256                 if (e->ino == sb->st_ino) {
 257                         if (initpid_still_valid(e, sb)) {
 258                                 e->lastcheck = time(NULL);
 259                                 return e;
 260                         }
 261                         remove_initpid(e);
 262                         return NULL;
 263                 }
 264                 e = e->next;
 265         }
 266
 267         return NULL;
 268 }
 269
 270 struct cgfs_files {
 271         char *name;
 272         uint32_t uid, gid;
 273         uint32_t mode;
 274 };
 275
 276 static void print_subsystems(void)
 277 {
 278         int i = 0;
 279
 280         fprintf(stderr, "mount namespace: %d\n", cgroup_ops->mntns_fd);
 281         fprintf(stderr, "hierarchies:\n");
 282         for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++, i++) {
 283                 __do_free char *controllers = lxc_string_join(",", (const char **)(*h)->controllers, false);
 284                 fprintf(stderr, " %2d: fd: %3d: %s\n", i, (*h)->fd, controllers ?: "");
 285         }
 286 }
 287
 288 bool cgfs_param_exist(const char *controller, const char *cgroup, const char *file)
 289 {
 290         int ret, cfd;
 291         size_t len;
 292         char *fnam;
 293
 294         cfd = get_cgroup_fd(controller);
 295         if (cfd < 0)
 296                 return false;
 297
 298         /* Make sure we pass a relative path to *at() family of functions.
 299          * . + /cgroup + / + file + \0
 300          */
 301         len = strlen(cgroup) + strlen(file) + 3;
 302         fnam = alloca(len);
 303         ret = snprintf(fnam, len, "%s%s/%s", dot_or_empty(cgroup), cgroup, file);
 304         if (ret < 0 || (size_t)ret >= len)
 305                 return false;
 306
 307         return (faccessat(cfd, fnam, F_OK, 0) == 0);
 308 }
 309
 310 #define SEND_CREDS_OK 0
 311 #define SEND_CREDS_NOTSK 1
 312 #define SEND_CREDS_FAIL 2
 313 static int wait_for_pid(pid_t pid);
 314 static int send_creds_clone_wrapper(void *arg);
 315
 316 /*
 317  * clone a task which switches to @task's namespace and writes '1'.
 318  * over a unix sock so we can read the task's reaper's pid in our
 319  * namespace
 320  *
 321  * Note: glibc's fork() does not respect pidns, which can lead to failed
 322  * assertions inside glibc (and thus failed forks) if the child's pid in
 323  * the pidns and the parent pid outside are identical. Using clone prevents
 324  * this issue.
 325  */
 326 static void write_task_init_pid_exit(int sock, pid_t target)
 327 {
 328         char fnam[100];
 329         pid_t pid;
 330         int fd, ret;
 331         size_t stack_size = sysconf(_SC_PAGESIZE);
 332         void *stack = alloca(stack_size);
 333
 334         ret = snprintf(fnam, sizeof(fnam), "/proc/%d/ns/pid", (int)target);
 335         if (ret < 0 || ret >= sizeof(fnam))
 336                 _exit(1);
 337
 338         fd = open(fnam, O_RDONLY);
 339         if (fd < 0) {
 340                 perror("write_task_init_pid_exit open of ns/pid");
 341                 _exit(1);
 342         }
 343         if (setns(fd, 0)) {
 344                 perror("write_task_init_pid_exit setns 1");
 345                 close(fd);
 346                 _exit(1);
 347         }
 348         pid = clone(send_creds_clone_wrapper, stack + stack_size, SIGCHLD, &sock);
 349         if (pid < 0)
 350                 _exit(1);
 351         if (pid != 0) {
 352                 if (!wait_for_pid(pid))
 353                         _exit(1);
 354                 _exit(0);
 355         }
 356 }
 357
 358 static int send_creds_clone_wrapper(void *arg) {
 359         struct ucred cred;
 360         char v;
 361         int sock = *(int *)arg;
 362
 363         /* we are the child */
 364         cred.uid = 0;
 365         cred.gid = 0;
 366         cred.pid = 1;
 367         v = '1';
 368         if (send_creds(sock, &cred, v, true) != SEND_CREDS_OK)
 369                 return 1;
 370         return 0;
 371 }
 372
 373 static pid_t get_init_pid_for_task(pid_t task)
 374 {
 375         int sock[2];
 376         pid_t pid;
 377         pid_t ret = -1;
 378         char v = '0';
 379         struct ucred cred;
 380
 381         if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0) {
 382                 perror("socketpair");
 383                 return -1;
 384         }
 385
 386         pid = fork();
 387         if (pid < 0)
 388                 goto out;
 389         if (!pid) {
 390                 close(sock[1]);
 391                 write_task_init_pid_exit(sock[0], task);
 392                 _exit(0);
 393         }
 394
 395         if (!recv_creds(sock[1], &cred, &v))
 396                 goto out;
 397         ret = cred.pid;
 398
 399 out:
 400         close(sock[0]);
 401         close(sock[1]);
 402         if (pid > 0)
 403                 wait_for_pid(pid);
 404         return ret;
 405 }
 406
 407 pid_t lookup_initpid_in_store(pid_t qpid)
 408 {
 409         pid_t answer = 0;
 410         struct stat sb;
 411         struct pidns_init_store *e;
 412         char fnam[100];
 413
 414         snprintf(fnam, 100, "/proc/%d/ns/pid", qpid);
 415         store_lock();
 416         if (stat(fnam, &sb) < 0)
 417                 goto out;
 418         e = lookup_verify_initpid(&sb);
 419         if (e) {
 420                 answer = e->initpid;
 421                 goto out;
 422         }
 423         answer = get_init_pid_for_task(qpid);
 424         if (answer > 0)
 425                 save_initpid(&sb, answer);
 426
 427 out:
 428         /* we prune at end in case we are returning
 429          * the value we were about to return */
 430         prune_initpid_store();
 431         store_unlock();
 432         return answer;
 433 }
 434
 435 static int wait_for_pid(pid_t pid)
 436 {
 437         int status, ret;
 438
 439         if (pid <= 0)
 440                 return -1;
 441
 442 again:
 443         ret = waitpid(pid, &status, 0);
 444         if (ret == -1) {
 445                 if (errno == EINTR)
 446                         goto again;
 447                 return -1;
 448         }
 449         if (ret != pid)
 450                 goto again;
 451         if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
 452                 return -1;
 453         return 0;
 454 }
 455
 456 #define INITSCOPE "/init.scope"
 457 void prune_init_slice(char *cg)
 458 {
 459         char *point;
 460         size_t cg_len = strlen(cg), initscope_len = strlen(INITSCOPE);
 461
 462         if (cg_len < initscope_len)
 463                 return;
 464
 465         point = cg + cg_len - initscope_len;
 466         if (strcmp(point, INITSCOPE) == 0) {
 467                 if (point == cg)
 468                         *(point+1) = '\0';
 469                 else
 470                         *point = '\0';
 471         }
 472 }
 473
 474 struct pid_ns_clone_args {
 475         int *cpipe;
 476         int sock;
 477         pid_t tpid;
 478         int (*wrapped) (int, pid_t); // pid_from_ns or pid_to_ns
 479 };
 480
 481 /*
 482  * Functions needed to setup cgroups in the __constructor__.
 483  */
 484
 485 static bool umount_if_mounted(void)
 486 {
 487         if (umount2(BASEDIR, MNT_DETACH) < 0 && errno != EINVAL) {
 488                 lxcfs_error("Failed to unmount %s: %s.\n", BASEDIR, strerror(errno));
 489                 return false;
 490         }
 491         return true;
 492 }
 493
 494 /* __typeof__ should be safe to use with all compilers. */
 495 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
 496 static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val)
 497 {
 498         return (fs->f_type == (fs_type_magic)magic_val);
 499 }
 500
 501 /*
 502  * looking at fs/proc_namespace.c, it appears we can
 503  * actually expect the rootfs entry to very specifically contain
 504  * " - rootfs rootfs "
 505  * IIUC, so long as we've chrooted so that rootfs is not our root,
 506  * the rootfs entry should always be skipped in mountinfo contents.
 507  */
 508 static bool is_on_ramfs(void)
 509 {
 510         FILE *f;
 511         char *p, *p2;
 512         char *line = NULL;
 513         size_t len = 0;
 514         int i;
 515
 516         f = fopen("/proc/self/mountinfo", "r");
 517         if (!f)
 518                 return false;
 519
 520         while (getline(&line, &len, f) != -1) {
 521                 for (p = line, i = 0; p && i < 4; i++)
 522                         p = strchr(p + 1, ' ');
 523                 if (!p)
 524                         continue;
 525                 p2 = strchr(p + 1, ' ');
 526                 if (!p2)
 527                         continue;
 528                 *p2 = '\0';
 529                 if (strcmp(p + 1, "/") == 0) {
 530                         // this is '/'.  is it the ramfs?
 531                         p = strchr(p2 + 1, '-');
 532                         if (p && strncmp(p, "- rootfs rootfs ", 16) == 0) {
 533                                 free(line);
 534                                 fclose(f);
 535                                 return true;
 536                         }
 537                 }
 538         }
 539         free(line);
 540         fclose(f);
 541         return false;
 542 }
 543
 544 static int pivot_enter()
 545 {
 546         int ret = -1, oldroot = -1, newroot = -1;
 547
 548         oldroot = open("/", O_DIRECTORY | O_RDONLY);
 549         if (oldroot < 0) {
 550                 lxcfs_error("%s\n", "Failed to open old root for fchdir.");
 551                 return ret;
 552         }
 553
 554         newroot = open(ROOTDIR, O_DIRECTORY | O_RDONLY);
 555         if (newroot < 0) {
 556                 lxcfs_error("%s\n", "Failed to open new root for fchdir.");
 557                 goto err;
 558         }
 559
 560         /* change into new root fs */
 561         if (fchdir(newroot) < 0) {
 562                 lxcfs_error("Failed to change directory to new rootfs: %s.\n", ROOTDIR);
 563                 goto err;
 564         }
 565
 566         /* pivot_root into our new root fs */
 567         if (pivot_root(".", ".") < 0) {
 568                 lxcfs_error("pivot_root() syscall failed: %s.\n", strerror(errno));
 569                 goto err;
 570         }
 571
 572         /*
 573          * At this point the old-root is mounted on top of our new-root.
 574          * To unmounted it we must not be chdir'd into it, so escape back
 575          * to the old-root.
 576          */
 577         if (fchdir(oldroot) < 0) {
 578                 lxcfs_error("%s\n", "Failed to enter old root.");
 579                 goto err;
 580         }
 581
 582         if (umount2(".", MNT_DETACH) < 0) {
 583                 lxcfs_error("%s\n", "Failed to detach old root.");
 584                 goto err;
 585         }
 586
 587         if (fchdir(newroot) < 0) {
 588                 lxcfs_error("%s\n", "Failed to re-enter new root.");
 589                 goto err;
 590         }
 591
 592         ret = 0;
 593
 594 err:
 595         if (oldroot > 0)
 596                 close(oldroot);
 597         if (newroot > 0)
 598                 close(newroot);
 599
 600         return ret;
 601 }
 602
 603 static int chroot_enter()
 604 {
 605         if (mount(ROOTDIR, "/", NULL, MS_REC | MS_BIND, NULL)) {
 606                 lxcfs_error("Failed to recursively bind-mount %s into /.", ROOTDIR);
 607                 return -1;
 608         }
 609
 610         if (chroot(".") < 0) {
 611                 lxcfs_error("Call to chroot() failed: %s.\n", strerror(errno));
 612                 return -1;
 613         }
 614
 615         if (chdir("/") < 0) {
 616                 lxcfs_error("Failed to change directory: %s.\n", strerror(errno));
 617                 return -1;
 618         }
 619
 620         return 0;
 621 }
 622
 623 static int permute_and_enter(void)
 624 {
 625         struct statfs sb;
 626
 627         if (statfs("/", &sb) < 0) {
 628                 lxcfs_error("%s\n", "Could not stat / mountpoint.");
 629                 return -1;
 630         }
 631
 632         /* has_fs_type() is not reliable. When the ramfs is a tmpfs it will
 633          * likely report TMPFS_MAGIC. Hence, when it reports no we still check
 634          * /proc/1/mountinfo. */
 635         if (has_fs_type(&sb, RAMFS_MAGIC) || is_on_ramfs())
 636                 return chroot_enter();
 637
 638         if (pivot_enter() < 0) {
 639                 lxcfs_error("%s\n", "Could not perform pivot root.");
 640                 return -1;
 641         }
 642
 643         return 0;
 644 }
 645
 646 /* Prepare our new clean root. */
 647 static int permute_prepare(void)
 648 {
 649         if (mkdir(ROOTDIR, 0700) < 0 && errno != EEXIST) {
 650                 lxcfs_error("%s\n", "Failed to create directory for new root.");
 651                 return -1;
 652         }
 653
 654         if (mount("/", ROOTDIR, NULL, MS_BIND, 0) < 0) {
 655                 lxcfs_error("Failed to bind-mount / for new root: %s.\n", strerror(errno));
 656                 return -1;
 657         }
 658
 659         if (mount(RUNTIME_PATH, ROOTDIR RUNTIME_PATH, NULL, MS_BIND, 0) < 0) {
 660                 lxcfs_error("Failed to bind-mount /run into new root: %s.\n", strerror(errno));
 661                 return -1;
 662         }
 663
 664         if (mount(BASEDIR, ROOTDIR BASEDIR, NULL, MS_REC | MS_MOVE, 0) < 0) {
 665                 printf("Failed to move " BASEDIR " into new root: %s.\n", strerror(errno));
 666                 return -1;
 667         }
 668
 669         return 0;
 670 }
 671
 672 /* Calls chroot() on ramfs, pivot_root() in all other cases. */
 673 static bool permute_root(void)
 674 {
 675         /* Prepare new root. */
 676         if (permute_prepare() < 0)
 677                 return false;
 678
 679         /* Pivot into new root. */
 680         if (permute_and_enter() < 0)
 681                 return false;
 682
 683         return true;
 684 }
 685
 686 static bool cgfs_prepare_mounts(void)
 687 {
 688         if (!mkdir_p(BASEDIR, 0700)) {
 689                 lxcfs_error("%s\n", "Failed to create lxcfs cgroup mountpoint.");
 690                 return false;
 691         }
 692
 693         if (!umount_if_mounted()) {
 694                 lxcfs_error("%s\n", "Failed to clean up old lxcfs cgroup mountpoint.");
 695                 return false;
 696         }
 697
 698         if (unshare(CLONE_NEWNS) < 0) {
 699                 lxcfs_error("Failed to unshare mount namespace: %s.\n", strerror(errno));
 700                 return false;
 701         }
 702
 703         cgroup_ops->mntns_fd = preserve_ns(getpid(), "mnt");
 704         if (cgroup_ops->mntns_fd < 0) {
 705                 lxcfs_error("Failed to preserve mount namespace: %s.\n", strerror(errno));
 706                 return false;
 707         }
 708
 709         if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0) < 0) {
 710                 lxcfs_error("Failed to remount / private: %s.\n", strerror(errno));
 711                 return false;
 712         }
 713
 714         if (mount("tmpfs", BASEDIR, "tmpfs", 0, "size=100000,mode=700") < 0) {
 715                 lxcfs_error("%s\n", "Failed to mount tmpfs over lxcfs cgroup mountpoint.");
 716                 return false;
 717         }
 718
 719         return true;
 720 }
 721
 722 static bool cgfs_mount_hierarchies(void)
 723 {
 724         if (!mkdir_p(BASEDIR DEFAULT_CGROUP_MOUNTPOINT, 0755))
 725                 return false;
 726
 727         if (!cgroup_ops->mount(cgroup_ops, BASEDIR))
 728                 return false;
 729
 730         for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++) {
 731                 __do_free char *path = must_make_path(BASEDIR, (*h)->mountpoint, NULL);
 732                 (*h)->fd = open(path, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW);
 733                 if ((*h)->fd < 0)
 734                         return false;
 735         }
 736
 737         return true;
 738 }
 739
 740 static bool cgfs_setup_controllers(void)
 741 {
 742         if (!cgfs_prepare_mounts())
 743                 return false;
 744
 745         if (!cgfs_mount_hierarchies()) {
 746                 lxcfs_error("%s\n", "Failed to set up private lxcfs cgroup mounts.");
 747                 return false;
 748         }
 749
 750         if (!permute_root())
 751                 return false;
 752
 753         return true;
 754 }
 755
 756 static void __attribute__((constructor)) lxcfs_init(void)
 757 {
 758         __do_close_prot_errno int init_ns = -EBADF;
 759         char *cret;
 760         char cwd[MAXPATHLEN];
 761
 762         cgroup_ops = cgroup_init();
 763         if (!cgroup_ops)
 764                 log_exit("Failed to initialize cgroup support");
 765
 766         /* Preserve initial namespace. */
 767         init_ns = preserve_ns(getpid(), "mnt");
 768         if (init_ns < 0)
 769                 log_exit("Failed to preserve initial mount namespace");
 770
 771         cret = getcwd(cwd, MAXPATHLEN);
 772                 log_exit("%s - Could not retrieve current working directory", strerror(errno));
 773
 774         /* This function calls unshare(CLONE_NEWNS) our initial mount namespace
 775          * to privately mount lxcfs cgroups. */
 776         if (!cgfs_setup_controllers())
 777                 log_exit("Failed to setup private cgroup mounts for lxcfs");
 778
 779         if (setns(init_ns, 0) < 0)
 780                 log_exit("%s - Failed to switch back to initial mount namespace", strerror(errno));
 781
 782         if (!cret || chdir(cwd) < 0)
 783                 log_exit("%s - Could not change back to original working directory", strerror(errno));
 784
 785         if (!init_cpuview())
 786                 log_exit("Failed to init CPU view");
 787
 788         print_subsystems();
 789 }
 790
 791 static void __attribute__((destructor)) lxcfs_exit(void)
 792 {
 793         lxcfs_debug("%s\n", "Running destructor for liblxcfs");
 794         free_cpuview();
 795         cgroup_exit(cgroup_ops);
 796 }