src/bindings.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2
   3 #ifndef _GNU_SOURCE
   4 #define _GNU_SOURCE
   5 #endif
   6
   7 #ifndef FUSE_USE_VERSION
   8 #define FUSE_USE_VERSION 26
   9 #endif
  10
  11 #define _FILE_OFFSET_BITS 64
  12
  13 #include <dirent.h>
  14 #include <errno.h>
  15 #include <fcntl.h>
  16 #include <fuse.h>
  17 #include <inttypes.h>
  18 #include <libgen.h>
  19 #include <pthread.h>
  20 #include <sched.h>
  21 #include <stdarg.h>
  22 #include <stdbool.h>
  23 #include <stdint.h>
  24 #include <stdio.h>
  25 #include <stdlib.h>
  26 #include <string.h>
  27 #include <time.h>
  28 #include <unistd.h>
  29 #include <wait.h>
  30 #include <linux/magic.h>
  31 #include <linux/sched.h>
  32 #include <sys/epoll.h>
  33 #include <sys/mman.h>
  34 #include <sys/mount.h>
  35 #include <sys/param.h>
  36 #include <signal.h>
  37 #include <sys/socket.h>
  38 #include <sys/syscall.h>
  39 #include <sys/sysinfo.h>
  40 #include <sys/vfs.h>
  41
  42 #include "api_extensions.h"
  43 #include "bindings.h"
  44 #include "config.h"
  45 #include "cgroup_fuse.h"
  46 #include "cgroups/cgroup.h"
  47 #include "cgroups/cgroup_utils.h"
  48 #include "memory_utils.h"
  49 #include "proc_cpuview.h"
  50 #include "utils.h"
  51
  52 static bool can_use_pidfd;
  53
  54 /* Define pivot_root() if missing from the C library */
  55 #ifndef HAVE_PIVOT_ROOT
  56 static int pivot_root(const char *new_root, const char *put_old)
  57 {
  58 #ifdef __NR_pivot_root
  59         return syscall(__NR_pivot_root, new_root, put_old);
  60 #else
  61         errno = ENOSYS;
  62         return -1;
  63 #endif
  64 }
  65 #else
  66 extern int pivot_root(const char *new_root, const char *put_old);
  67 #endif
  68
  69 /*
  70  * A table caching which pid is init for a pid namespace.
  71  * When looking up which pid is init for $qpid, we first
  72  * 1. Stat /proc/$qpid/ns/pid.
  73  * 2. Check whether the ino_t is in our store.
  74  *   a. if not, fork a child in qpid's ns to send us
  75  *       ucred.pid = 1, and read the initpid.  Cache
  76  *       initpid and creation time for /proc/initpid
  77  *       in a new store entry.
  78  *   b. if so, verify that /proc/initpid still matches
  79  *       what we have saved.  If not, clear the store
  80  *       entry and go back to a.  If so, return the
  81  *       cached initpid.
  82  */
  83 struct pidns_init_store {
  84         ino_t ino;     /* inode number for /proc/$pid/ns/pid */
  85         pid_t initpid; /* the pid of nit in that ns */
  86         int init_pidfd;
  87         long int ctime; /* the time at which /proc/$initpid was created */
  88         struct pidns_init_store *next;
  89         long int lastcheck;
  90 };
  91
  92 /* lol - look at how they are allocated in the kernel */
  93 #define PIDNS_HASH_SIZE 4096
  94 #define HASH(x) ((x) % PIDNS_HASH_SIZE)
  95
  96 static struct pidns_init_store *pidns_hash_table[PIDNS_HASH_SIZE];
  97 static pthread_mutex_t pidns_store_mutex = PTHREAD_MUTEX_INITIALIZER;
  98
  99 static void lock_mutex(pthread_mutex_t *l)
 100 {
 101         int ret;
 102
 103         ret = pthread_mutex_lock(l);
 104         if (ret)
 105                 log_exit("%s - returned %d\n", strerror(ret), ret);
 106 }
 107
 108 struct cgroup_ops *cgroup_ops;
 109
 110 static void unlock_mutex(pthread_mutex_t *l)
 111 {
 112         int ret;
 113
 114         ret = pthread_mutex_unlock(l);
 115         if (ret)
 116                 log_exit("%s - returned %d\n", strerror(ret), ret);
 117 }
 118
 119 static void store_lock(void)
 120 {
 121         lock_mutex(&pidns_store_mutex);
 122 }
 123
 124 static void store_unlock(void)
 125 {
 126         unlock_mutex(&pidns_store_mutex);
 127 }
 128
 129 /* /proc/       =    6
 130  *                +
 131  * <pid-as-str> =   INTTYPE_TO_STRLEN(pid_t)
 132  *                +
 133  * \0           =    1
 134  */
 135 #define LXCFS_PROC_PID_LEN \
 136         (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + +1)
 137
 138 static int initpid_still_valid_pidfd(struct pidns_init_store *entry)
 139 {
 140         int ret;
 141
 142         if (entry->init_pidfd < 0)
 143                 return ret_errno(ENOSYS);
 144
 145         ret = pidfd_send_signal(entry->init_pidfd, 0, NULL, 0);
 146         if (ret < 0) {
 147                 if (errno == ENOSYS)
 148                         return ret_errno(ENOSYS);
 149
 150                 return 0;
 151         }
 152
 153         return 1;
 154 }
 155
 156 static int initpid_still_valid_stat(struct pidns_init_store *entry)
 157 {
 158         struct stat st;
 159         char path[LXCFS_PROC_PID_LEN];
 160
 161         snprintf(path, sizeof(path), "/proc/%d", entry->initpid);
 162         if (stat(path, &st) || entry->ctime != st.st_ctime)
 163                 return 0;
 164
 165         return 1;
 166 }
 167
 168 /* Must be called under store_lock */
 169 static bool initpid_still_valid(struct pidns_init_store *entry)
 170 {
 171         int ret;
 172
 173         ret = initpid_still_valid_pidfd(entry);
 174         if (ret < 0)
 175                 ret = initpid_still_valid_stat(entry);
 176
 177         return ret == 1;
 178 }
 179
 180 /* Must be called under store_lock */
 181 static void remove_initpid(struct pidns_init_store *entry)
 182 {
 183         struct pidns_init_store *it;
 184         int ino_hash;
 185
 186         lxcfs_debug("Removing cached entry for pid %d from init pid cache",
 187                     entry->initpid);
 188
 189         ino_hash = HASH(entry->ino);
 190         if (pidns_hash_table[ino_hash] == entry) {
 191                 pidns_hash_table[ino_hash] = entry->next;
 192                 close_prot_errno_disarm(entry->init_pidfd);
 193                 free_disarm(entry);
 194                 return;
 195         }
 196
 197         it = pidns_hash_table[ino_hash];
 198         while (it) {
 199                 if (it->next == entry) {
 200                         it->next = entry->next;
 201                         close_prot_errno_disarm(entry->init_pidfd);
 202                         free_disarm(entry);
 203                         return;
 204                 }
 205                 it = it->next;
 206         }
 207 }
 208
 209 #define PURGE_SECS 5
 210 /* Must be called under store_lock */
 211 static void prune_initpid_store(void)
 212 {
 213         static long int last_prune = 0;
 214         long int now, threshold;
 215
 216         if (!last_prune) {
 217                 last_prune = time(NULL);
 218                 return;
 219         }
 220
 221         now = time(NULL);
 222         if (now < last_prune + PURGE_SECS)
 223                 return;
 224
 225         lxcfs_debug("Pruning init pid cache");
 226
 227         last_prune = now;
 228         threshold = now - 2 * PURGE_SECS;
 229
 230         for (int i = 0; i < PIDNS_HASH_SIZE; i++) {
 231                 for (struct pidns_init_store *entry = pidns_hash_table[i], *prev = NULL; entry;) {
 232                         if (entry->lastcheck < threshold) {
 233                                 struct pidns_init_store *cur = entry;
 234
 235                                 lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid);
 236
 237                                 if (prev)
 238                                         prev->next = entry->next;
 239                                 else
 240                                         pidns_hash_table[i] = entry->next;
 241                                 entry = entry->next;
 242                                 close_prot_errno_disarm(cur->init_pidfd);
 243                                 free_disarm(cur);
 244                         } else {
 245                                 prev = entry;
 246                                 entry = entry->next;
 247                         }
 248                 }
 249         }
 250 }
 251
 252 /* Must be called under store_lock */
 253 static void save_initpid(struct stat *sb, pid_t pid)
 254 {
 255         __do_free struct pidns_init_store *entry = NULL;
 256         __do_close_prot_errno int pidfd = -EBADF;
 257         char path[LXCFS_PROC_PID_LEN];
 258         struct lxcfs_opts *opts = fuse_get_context()->private_data;
 259         struct stat st;
 260         int ino_hash;
 261
 262         if (opts && opts->use_pidfd && can_use_pidfd) {
 263                 pidfd = pidfd_open(pid, 0);
 264                 if (pidfd < 0)
 265                         return;
 266         }
 267
 268         snprintf(path, sizeof(path), "/proc/%d", pid);
 269         if (stat(path, &st))
 270                 return;
 271
 272         entry = malloc(sizeof(*entry));
 273         if (entry)
 274                 return;
 275
 276         ino_hash = HASH(entry->ino);
 277         *entry = (struct pidns_init_store){
 278                 .ino            = sb->st_ino,
 279                 .initpid        = pid,
 280                 .ctime          = st.st_ctime,
 281                 .next           = pidns_hash_table[ino_hash],
 282                 .lastcheck      = time(NULL),
 283                 .init_pidfd     = move_fd(pidfd),
 284         };
 285         pidns_hash_table[ino_hash] = move_ptr(entry);
 286
 287         lxcfs_debug("Added cache entry %d for pid %d to init pid cache", ino_hash, pid);
 288 }
 289
 290 /*
 291  * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store
 292  * entry for the inode number and creation time.  Verify that the init pid
 293  * is still valid.  If not, remove it.  Return the entry if valid, NULL
 294  * otherwise.
 295  * Must be called under store_lock
 296  */
 297 static struct pidns_init_store *lookup_verify_initpid(struct stat *sb)
 298 {
 299         struct pidns_init_store *entry = pidns_hash_table[HASH(sb->st_ino)];
 300
 301         while (entry) {
 302                 if (entry->ino == sb->st_ino) {
 303                         if (initpid_still_valid(entry)) {
 304                                 entry->lastcheck = time(NULL);
 305                                 return entry;
 306                         }
 307
 308                         remove_initpid(entry);
 309                         return NULL;
 310                 }
 311                 entry = entry->next;
 312         }
 313
 314         return NULL;
 315 }
 316
 317 static int send_creds_clone_wrapper(void *arg)
 318 {
 319         int sock = PTR_TO_INT(arg);
 320         char v = '1'; /* we are the child */
 321         struct ucred cred = {
 322             .uid = 0,
 323             .gid = 0,
 324             .pid = 1,
 325         };
 326
 327         return send_creds(sock, &cred, v, true) != SEND_CREDS_OK;
 328 }
 329
 330 /*
 331  * Let's use the "standard stack limit" (i.e. glibc thread size default) for
 332  * stack sizes: 8MB.
 333  */
 334 #define __LXCFS_STACK_SIZE (8 * 1024 * 1024)
 335 static pid_t lxcfs_clone(int (*fn)(void *), void *arg, int flags)
 336 {
 337         pid_t ret;
 338         void *stack;
 339
 340         stack = malloc(__LXCFS_STACK_SIZE);
 341         if (!stack)
 342                 return ret_errno(ENOMEM);
 343
 344 #ifdef __ia64__
 345         ret = __clone2(fn, stack, __LXCFS_STACK_SIZE, flags | SIGCHLD, arg, NULL);
 346 #else
 347         ret = clone(fn, stack + __LXCFS_STACK_SIZE, flags | SIGCHLD, arg, NULL);
 348 #endif
 349         return ret;
 350 }
 351
 352 #define LXCFS_PROC_PID_NS_LEN                                    \
 353         (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + \
 354          STRLITERALLEN("/ns/pid") + 1)
 355
 356 /*
 357  * clone a task which switches to @task's namespace and writes '1'.
 358  * over a unix sock so we can read the task's reaper's pid in our
 359  * namespace
 360  *
 361  * Note: glibc's fork() does not respect pidns, which can lead to failed
 362  * assertions inside glibc (and thus failed forks) if the child's pid in
 363  * the pidns and the parent pid outside are identical. Using clone prevents
 364  * this issue.
 365  */
 366 static void write_task_init_pid_exit(int sock, pid_t target)
 367 {
 368         __do_close_prot_errno int fd = -EBADF;
 369         char path[LXCFS_PROC_PID_NS_LEN];
 370         pid_t pid;
 371
 372         snprintf(path, sizeof(path), "/proc/%d/ns/pid", (int)target);
 373         fd = open(path, O_RDONLY | O_CLOEXEC);
 374         if (fd < 0)
 375                 log_exit("write_task_init_pid_exit open of ns/pid");
 376
 377         if (setns(fd, 0))
 378                 log_exit("Failed to setns to pid namespace of process %d", target);
 379
 380         pid = lxcfs_clone(send_creds_clone_wrapper, INT_TO_PTR(sock), 0);
 381         if (pid < 0)
 382                 _exit(EXIT_FAILURE);
 383
 384         if (pid != 0) {
 385                 if (!wait_for_pid(pid))
 386                         _exit(EXIT_FAILURE);
 387
 388                 _exit(EXIT_SUCCESS);
 389         }
 390 }
 391
 392 static pid_t get_init_pid_for_task(pid_t task)
 393 {
 394         char v = '0';
 395         pid_t pid_ret = -1;
 396         pid_t pid;
 397         int sock[2];
 398         struct ucred cred;
 399
 400         if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0)
 401                 return -1;
 402
 403         pid = fork();
 404         if (pid < 0)
 405                 goto out;
 406
 407         if (pid == 0) {
 408                 close(sock[1]);
 409                 write_task_init_pid_exit(sock[0], task);
 410                 _exit(EXIT_SUCCESS);
 411         }
 412
 413         if (!recv_creds(sock[1], &cred, &v))
 414                 goto out;
 415
 416         pid_ret = cred.pid;
 417
 418 out:
 419         close(sock[0]);
 420         close(sock[1]);
 421         if (pid > 0)
 422                 wait_for_pid(pid);
 423
 424         return pid_ret;
 425 }
 426
 427 pid_t lookup_initpid_in_store(pid_t pid)
 428 {
 429         pid_t answer = 0;
 430         char path[LXCFS_PROC_PID_NS_LEN];
 431         struct stat st;
 432         struct pidns_init_store *entry;
 433
 434         snprintf(path, sizeof(path), "/proc/%d/ns/pid", pid);
 435
 436         store_lock();
 437         if (stat(path, &st))
 438                 goto out;
 439
 440         entry = lookup_verify_initpid(&st);
 441         if (entry) {
 442                 answer = entry->initpid;
 443                 goto out;
 444         }
 445
 446         answer = get_init_pid_for_task(pid);
 447         if (answer > 0)
 448                 save_initpid(&st, answer);
 449
 450 out:
 451         /*
 452          * Prune at the end in case we're returning the value we were about to
 453          * return.
 454          */
 455         prune_initpid_store();
 456
 457         store_unlock();
 458
 459         return answer;
 460 }
 461
 462 /*
 463  * Functions needed to setup cgroups in the __constructor__.
 464  */
 465
 466 static bool umount_if_mounted(void)
 467 {
 468         if (umount2(BASEDIR, MNT_DETACH) < 0 && errno != EINVAL) {
 469                 lxcfs_error("Failed to unmount %s: %s.\n", BASEDIR, strerror(errno));
 470                 return false;
 471         }
 472         return true;
 473 }
 474
 475 /* __typeof__ should be safe to use with all compilers. */
 476 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
 477 static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val)
 478 {
 479         return (fs->f_type == (fs_type_magic)magic_val);
 480 }
 481
 482 /*
 483  * looking at fs/proc_namespace.c, it appears we can
 484  * actually expect the rootfs entry to very specifically contain
 485  * " - rootfs rootfs "
 486  * IIUC, so long as we've chrooted so that rootfs is not our root,
 487  * the rootfs entry should always be skipped in mountinfo contents.
 488  */
 489 static bool is_on_ramfs(void)
 490 {
 491         __do_free char *line = NULL;
 492         __do_free void *fopen_cache = NULL;
 493         __do_fclose FILE *f = NULL;
 494         size_t len = 0;
 495
 496         f = fopen_cached("/proc/self/mountinfo", "re", &fopen_cache);
 497         if (!f)
 498                 return false;
 499
 500         while (getline(&line, &len, f) != -1) {
 501                 int i;
 502                 char *p, *p2;
 503
 504                 for (p = line, i = 0; p && i < 4; i++)
 505                         p = strchr(p + 1, ' ');
 506                 if (!p)
 507                         continue;
 508
 509                 p2 = strchr(p + 1, ' ');
 510                 if (!p2)
 511                         continue;
 512                 *p2 = '\0';
 513                 if (strcmp(p + 1, "/") == 0) {
 514                         /* This is '/'. Is it the ramfs? */
 515                         p = strchr(p2 + 1, '-');
 516                         if (p && strncmp(p, "- rootfs rootfs ", 16) == 0)
 517                                 return true;
 518                 }
 519         }
 520
 521         return false;
 522 }
 523
 524 static int pivot_enter()
 525 {
 526         __do_close_prot_errno int oldroot = -EBADF, newroot = -EBADF;
 527
 528         oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
 529         if (oldroot < 0)
 530                 return log_error_errno(-1, errno,
 531                                        "Failed to open old root for fchdir");
 532
 533         newroot = open(ROOTDIR, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
 534         if (newroot < 0)
 535                 return log_error_errno(-1, errno,
 536                                        "Failed to open new root for fchdir");
 537
 538         /* change into new root fs */
 539         if (fchdir(newroot) < 0)
 540                 return log_error_errno(-1,
 541                                        errno, "Failed to change directory to new rootfs: %s",
 542                                        ROOTDIR);
 543
 544         /* pivot_root into our new root fs */
 545         if (pivot_root(".", ".") < 0)
 546                 return log_error_errno(-1, errno,
 547                                        "pivot_root() syscall failed: %s",
 548                                        strerror(errno));
 549
 550         /*
 551          * At this point the old-root is mounted on top of our new-root.
 552          * To unmounted it we must not be chdir'd into it, so escape back
 553          * to the old-root.
 554          */
 555         if (fchdir(oldroot) < 0)
 556                 return log_error_errno(-1, errno, "Failed to enter old root");
 557
 558         if (umount2(".", MNT_DETACH) < 0)
 559                 return log_error_errno(-1, errno, "Failed to detach old root");
 560
 561         if (fchdir(newroot) < 0)
 562                 return log_error_errno(-1, errno, "Failed to re-enter new root");
 563
 564         return 0;
 565 }
 566
 567 static int chroot_enter()
 568 {
 569         if (mount(ROOTDIR, "/", NULL, MS_REC | MS_BIND, NULL)) {
 570                 lxcfs_error("Failed to recursively bind-mount %s into /.", ROOTDIR);
 571                 return -1;
 572         }
 573
 574         if (chroot(".") < 0) {
 575                 lxcfs_error("Call to chroot() failed: %s.\n", strerror(errno));
 576                 return -1;
 577         }
 578
 579         if (chdir("/") < 0) {
 580                 lxcfs_error("Failed to change directory: %s.\n", strerror(errno));
 581                 return -1;
 582         }
 583
 584         return 0;
 585 }
 586
 587 static int permute_and_enter(void)
 588 {
 589         struct statfs sb;
 590
 591         if (statfs("/", &sb) < 0) {
 592                 lxcfs_error("%s\n", "Could not stat / mountpoint.");
 593                 return -1;
 594         }
 595
 596         /* has_fs_type() is not reliable. When the ramfs is a tmpfs it will
 597          * likely report TMPFS_MAGIC. Hence, when it reports no we still check
 598          * /proc/1/mountinfo. */
 599         if (has_fs_type(&sb, RAMFS_MAGIC) || is_on_ramfs())
 600                 return chroot_enter();
 601
 602         if (pivot_enter() < 0) {
 603                 lxcfs_error("%s\n", "Could not perform pivot root.");
 604                 return -1;
 605         }
 606
 607         return 0;
 608 }
 609
 610 /* Prepare our new clean root. */
 611 static int permute_prepare(void)
 612 {
 613         if (mkdir(ROOTDIR, 0700) < 0 && errno != EEXIST) {
 614                 lxcfs_error("%s\n", "Failed to create directory for new root.");
 615                 return -1;
 616         }
 617
 618         if (mount("/", ROOTDIR, NULL, MS_BIND, 0) < 0) {
 619                 lxcfs_error("Failed to bind-mount / for new root: %s.\n", strerror(errno));
 620                 return -1;
 621         }
 622
 623         if (mount(RUNTIME_PATH, ROOTDIR RUNTIME_PATH, NULL, MS_BIND, 0) < 0) {
 624                 lxcfs_error("Failed to bind-mount /run into new root: %s.\n", strerror(errno));
 625                 return -1;
 626         }
 627
 628         if (mount(BASEDIR, ROOTDIR BASEDIR, NULL, MS_REC | MS_MOVE, 0) < 0) {
 629                 printf("Failed to move " BASEDIR " into new root: %s.\n", strerror(errno));
 630                 return -1;
 631         }
 632
 633         return 0;
 634 }
 635
 636 /* Calls chroot() on ramfs, pivot_root() in all other cases. */
 637 static bool permute_root(void)
 638 {
 639         /* Prepare new root. */
 640         if (permute_prepare() < 0)
 641                 return false;
 642
 643         /* Pivot into new root. */
 644         if (permute_and_enter() < 0)
 645                 return false;
 646
 647         return true;
 648 }
 649
 650 static bool cgfs_prepare_mounts(void)
 651 {
 652         if (!mkdir_p(BASEDIR, 0700)) {
 653                 lxcfs_error("%s\n", "Failed to create lxcfs cgroup mountpoint.");
 654                 return false;
 655         }
 656
 657         if (!umount_if_mounted()) {
 658                 lxcfs_error("%s\n", "Failed to clean up old lxcfs cgroup mountpoint.");
 659                 return false;
 660         }
 661
 662         if (unshare(CLONE_NEWNS) < 0) {
 663                 lxcfs_error("Failed to unshare mount namespace: %s.\n", strerror(errno));
 664                 return false;
 665         }
 666
 667         cgroup_ops->mntns_fd = preserve_ns(getpid(), "mnt");
 668         if (cgroup_ops->mntns_fd < 0) {
 669                 lxcfs_error("Failed to preserve mount namespace: %s.\n", strerror(errno));
 670                 return false;
 671         }
 672
 673         if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0) < 0) {
 674                 lxcfs_error("Failed to remount / private: %s.\n", strerror(errno));
 675                 return false;
 676         }
 677
 678         if (mount("tmpfs", BASEDIR, "tmpfs", 0, "size=100000,mode=700") < 0) {
 679                 lxcfs_error("%s\n", "Failed to mount tmpfs over lxcfs cgroup mountpoint.");
 680                 return false;
 681         }
 682
 683         return true;
 684 }
 685
 686 static bool cgfs_mount_hierarchies(void)
 687 {
 688         if (!mkdir_p(BASEDIR DEFAULT_CGROUP_MOUNTPOINT, 0755))
 689                 return false;
 690
 691         if (!cgroup_ops->mount(cgroup_ops, BASEDIR))
 692                 return false;
 693
 694         for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++) {
 695                 __do_free char *path = must_make_path(BASEDIR, (*h)->mountpoint, NULL);
 696                 (*h)->fd = open(path, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW);
 697                 if ((*h)->fd < 0)
 698                         return false;
 699         }
 700
 701         return true;
 702 }
 703
 704 static bool cgfs_setup_controllers(void)
 705 {
 706         if (!cgfs_prepare_mounts())
 707                 return false;
 708
 709         if (!cgfs_mount_hierarchies())
 710                 return log_error_errno(false, errno, "Failed to set up private lxcfs cgroup mounts");
 711
 712         if (!permute_root())
 713                 return false;
 714
 715         return true;
 716 }
 717
 718 static void __attribute__((constructor)) lxcfs_init(void)
 719 {
 720         __do_close_prot_errno int init_ns = -EBADF, root_fd = -EBADF,
 721                                   pidfd = -EBADF;
 722         int i = 0;
 723         pid_t pid;
 724
 725         lxcfs_info("Running constructor %s to reload liblxcfs", __func__);
 726
 727         cgroup_ops = cgroup_init();
 728         if (!cgroup_ops) {
 729                 lxcfs_info("Failed to initialize cgroup support");
 730                 goto broken_upgrade;
 731         }
 732
 733         /* Preserve initial namespace. */
 734         pid = getpid();
 735         init_ns = preserve_ns(pid, "mnt");
 736         if (init_ns < 0) {
 737                 lxcfs_info("Failed to preserve initial mount namespace");
 738                 goto broken_upgrade;
 739         }
 740
 741         /* This function calls unshare(CLONE_NEWNS) our initial mount namespace
 742          * to privately mount lxcfs cgroups. */
 743         if (!cgfs_setup_controllers()) {
 744                 log_exit("Failed to setup private cgroup mounts for lxcfs");
 745                 goto broken_upgrade;
 746         }
 747
 748         if (setns(init_ns, 0) < 0) {
 749                 log_exit("%s - Failed to switch back to initial mount namespace", strerror(errno));
 750                 goto broken_upgrade;
 751         }
 752
 753         if (!init_cpuview()) {
 754                 log_exit("Failed to init CPU view");
 755                 goto broken_upgrade;
 756         }
 757
 758         lxcfs_info("mount namespace: %d", cgroup_ops->mntns_fd);
 759         lxcfs_info("hierarchies:");
 760
 761         for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++, i++) {
 762                 char **controller_list = (*h)->controllers;
 763                 __do_free char *controllers = NULL;
 764                 if (controller_list && *controller_list)
 765                         controllers = lxc_string_join(",", (const char **)controller_list, false);
 766                 lxcfs_info(" %2d: fd: %3d: %s", i, (*h)->fd, controllers ?: "");
 767         }
 768
 769         pidfd = pidfd_open(pid, 0);
 770         if (pidfd >= 0 && pidfd_send_signal(pidfd, 0, NULL, 0) == 0) {
 771                 can_use_pidfd = true;
 772                 lxcfs_info("Kernel supports pidfds");
 773         }
 774
 775         lxcfs_info("api_extensions:");
 776         for (i = 0; i < nr_api_extensions; i++)
 777                 lxcfs_info("- %s", api_extensions[i]);
 778
 779         root_fd = open("/", O_PATH | O_CLOEXEC);
 780         if (root_fd < 0)
 781                 lxcfs_info("%s - Failed to open root directory", strerror(errno));
 782         else if (fchdir(root_fd) < 0)
 783                 lxcfs_info("%s - Failed to change to root directory", strerror(errno));
 784
 785         return;
 786
 787 broken_upgrade:
 788         lxcfs_info("Failed to run constructor %s to reload liblxcfs", __func__);
 789 }
 790
 791 static void __attribute__((destructor)) lxcfs_exit(void)
 792 {
 793         lxcfs_info("Running destructor %s", __func__);
 794
 795         free_cpuview();
 796         cgroup_exit(cgroup_ops);
 797 }