]> git.proxmox.com Git - mirror_lxcfs.git/blob - src/bindings.c
cgroup_fuse: s/clone/lxcfs_clone/g
[mirror_lxcfs.git] / src / bindings.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6
7 #ifndef FUSE_USE_VERSION
8 #define FUSE_USE_VERSION 26
9 #endif
10
11 #define _FILE_OFFSET_BITS 64
12
13 #include <dirent.h>
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <fuse.h>
17 #include <inttypes.h>
18 #include <libgen.h>
19 #include <linux/magic.h>
20 #include <linux/sched.h>
21 #include <pthread.h>
22 #include <sched.h>
23 #include <stdarg.h>
24 #include <stdbool.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <sys/epoll.h>
30 #include <sys/mman.h>
31 #include <sys/mount.h>
32 #include <sys/param.h>
33 #include <sys/socket.h>
34 #include <sys/syscall.h>
35 #include <sys/sysinfo.h>
36 #include <sys/vfs.h>
37 #include <time.h>
38 #include <unistd.h>
39 #include <wait.h>
40
41 #include "api_extensions.h"
42 #include "bindings.h"
43 #include "cgroup_fuse.h"
44 #include "cgroups/cgroup.h"
45 #include "cgroups/cgroup_utils.h"
46 #include "config.h"
47 #include "memory_utils.h"
48 #include "proc_cpuview.h"
49 #include "syscall_numbers.h"
50 #include "utils.h"
51
52 static bool can_use_pidfd;
53
54 static volatile sig_atomic_t reload_successful;
55
56 bool liblxcfs_functional(void)
57 {
58 return reload_successful != 0;
59 }
60
61 /* Define pivot_root() if missing from the C library */
62 #ifndef HAVE_PIVOT_ROOT
63 static int pivot_root(const char *new_root, const char *put_old)
64 {
65 return syscall(__NR_pivot_root, new_root, put_old);
66 }
67 #else
68 extern int pivot_root(const char *new_root, const char *put_old);
69 #endif
70
71 /*
72 * A table caching which pid is init for a pid namespace.
73 * When looking up which pid is init for $qpid, we first
74 * 1. Stat /proc/$qpid/ns/pid.
75 * 2. Check whether the ino_t is in our store.
76 * a. if not, fork a child in qpid's ns to send us
77 * ucred.pid = 1, and read the initpid. Cache
78 * initpid and creation time for /proc/initpid
79 * in a new store entry.
80 * b. if so, verify that /proc/initpid still matches
81 * what we have saved. If not, clear the store
82 * entry and go back to a. If so, return the
83 * cached initpid.
84 */
85 struct pidns_init_store {
86 ino_t ino; /* inode number for /proc/$pid/ns/pid */
87 pid_t initpid; /* the pid of nit in that ns */
88 int init_pidfd;
89 int64_t ctime; /* the time at which /proc/$initpid was created */
90 struct pidns_init_store *next;
91 int64_t lastcheck;
92 };
93
94 /* lol - look at how they are allocated in the kernel */
95 #define PIDNS_HASH_SIZE 4096
96 #define HASH(x) ((x) % PIDNS_HASH_SIZE)
97
98 static struct pidns_init_store *pidns_hash_table[PIDNS_HASH_SIZE];
99 static pthread_mutex_t pidns_store_mutex = PTHREAD_MUTEX_INITIALIZER;
100
101 static void lock_mutex(pthread_mutex_t *l)
102 {
103 int ret;
104
105 ret = pthread_mutex_lock(l);
106 if (ret)
107 log_exit("%s - returned %d\n", strerror(ret), ret);
108 }
109
110 struct cgroup_ops *cgroup_ops;
111
112 static void unlock_mutex(pthread_mutex_t *l)
113 {
114 int ret;
115
116 ret = pthread_mutex_unlock(l);
117 if (ret)
118 log_exit("%s - returned %d\n", strerror(ret), ret);
119 }
120
121 static inline void unlock_mutex_function(pthread_mutex_t **mutex)
122 {
123 if (*mutex)
124 unlock_mutex(*mutex);
125 }
126 #define __do_unlock call_cleaner(unlock_mutex)
127
128 static pthread_mutex_t* __attribute__((warn_unused_result)) store_lock(void)
129 {
130 lock_mutex(&pidns_store_mutex);
131 return &pidns_store_mutex;
132 }
133
134 /* /proc/ = 6
135 * +
136 * <pid-as-str> = INTTYPE_TO_STRLEN(pid_t)
137 * +
138 * \0 = 1
139 */
140 #define LXCFS_PROC_PID_LEN \
141 (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + +1)
142
143 static int initpid_still_valid_pidfd(struct pidns_init_store *entry)
144 {
145 int ret;
146
147 if (entry->init_pidfd < 0)
148 return ret_errno(ENOSYS);
149
150 ret = pidfd_send_signal(entry->init_pidfd, 0, NULL, 0);
151 if (ret < 0) {
152 if (errno == ENOSYS)
153 return ret_errno(ENOSYS);
154
155 return 0;
156 }
157
158 return 1;
159 }
160
161 static int initpid_still_valid_stat(struct pidns_init_store *entry)
162 {
163 struct stat st;
164 char path[LXCFS_PROC_PID_LEN];
165
166 snprintf(path, sizeof(path), "/proc/%d", entry->initpid);
167 if (stat(path, &st) || entry->ctime != st.st_ctime)
168 return 0;
169
170 return 1;
171 }
172
173 /* Must be called under store_lock */
174 static bool initpid_still_valid(struct pidns_init_store *entry)
175 {
176 int ret;
177
178 ret = initpid_still_valid_pidfd(entry);
179 if (ret < 0)
180 ret = initpid_still_valid_stat(entry);
181
182 return ret == 1;
183 }
184
185 /* Must be called under store_lock */
186 static void remove_initpid(struct pidns_init_store *entry)
187 {
188 struct pidns_init_store *it;
189 int ino_hash;
190
191 lxcfs_debug("Removing cached entry for pid %d from init pid cache",
192 entry->initpid);
193
194 ino_hash = HASH(entry->ino);
195 if (pidns_hash_table[ino_hash] == entry) {
196 pidns_hash_table[ino_hash] = entry->next;
197 close_prot_errno_disarm(entry->init_pidfd);
198 free_disarm(entry);
199 return;
200 }
201
202 it = pidns_hash_table[ino_hash];
203 while (it) {
204 if (it->next == entry) {
205 it->next = entry->next;
206 close_prot_errno_disarm(entry->init_pidfd);
207 free_disarm(entry);
208 return;
209 }
210 it = it->next;
211 }
212 }
213
214 #define PURGE_SECS 5
215 /* Must be called under store_lock */
216 static void prune_initpid_store(void)
217 {
218 static int64_t last_prune = 0;
219 int64_t now, threshold;
220
221 if (!last_prune) {
222 last_prune = time(NULL);
223 return;
224 }
225
226 now = time(NULL);
227 if (now < last_prune + PURGE_SECS)
228 return;
229
230 lxcfs_debug("Pruning init pid cache");
231
232 last_prune = now;
233 threshold = now - 2 * PURGE_SECS;
234
235 for (int i = 0; i < PIDNS_HASH_SIZE; i++) {
236 for (struct pidns_init_store *entry = pidns_hash_table[i], *prev = NULL; entry;) {
237 if (entry->lastcheck < threshold) {
238 struct pidns_init_store *cur = entry;
239
240 lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid);
241
242 if (prev)
243 prev->next = entry->next;
244 else
245 pidns_hash_table[i] = entry->next;
246 entry = entry->next;
247 close_prot_errno_disarm(cur->init_pidfd);
248 free_disarm(cur);
249 } else {
250 prev = entry;
251 entry = entry->next;
252 }
253 }
254 }
255 }
256
257 /* Must be called under store_lock */
258 static void save_initpid(ino_t pidns_inode, pid_t pid)
259 {
260 __do_free struct pidns_init_store *entry = NULL;
261 __do_close int pidfd = -EBADF;
262 char path[LXCFS_PROC_PID_LEN];
263 struct lxcfs_opts *opts = fuse_get_context()->private_data;
264 struct stat st;
265 int ino_hash;
266
267 if (opts && opts->use_pidfd && can_use_pidfd) {
268 pidfd = pidfd_open(pid, 0);
269 if (pidfd < 0)
270 return;
271 }
272
273 snprintf(path, sizeof(path), "/proc/%d", pid);
274 if (stat(path, &st))
275 return;
276
277 entry = malloc(sizeof(*entry));
278 if (entry)
279 return;
280
281 ino_hash = HASH(entry->ino);
282 *entry = (struct pidns_init_store){
283 .ino = pidns_inode,
284 .initpid = pid,
285 .ctime = st.st_ctime,
286 .next = pidns_hash_table[ino_hash],
287 .lastcheck = time(NULL),
288 .init_pidfd = move_fd(pidfd),
289 };
290 pidns_hash_table[ino_hash] = move_ptr(entry);
291
292 lxcfs_debug("Added cache entry %d for pid %d to init pid cache", ino_hash, pid);
293 }
294
295 /*
296 * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store
297 * entry for the inode number and creation time. Verify that the init pid
298 * is still valid. If not, remove it. Return the entry if valid, NULL
299 * otherwise.
300 * Must be called under store_lock
301 */
302 static struct pidns_init_store *lookup_verify_initpid(ino_t pidns_inode)
303 {
304 struct pidns_init_store *entry = pidns_hash_table[HASH(pidns_inode)];
305
306 while (entry) {
307 if (entry->ino == pidns_inode) {
308 if (initpid_still_valid(entry)) {
309 entry->lastcheck = time(NULL);
310 return entry;
311 }
312
313 remove_initpid(entry);
314 return NULL;
315 }
316 entry = entry->next;
317 }
318
319 return NULL;
320 }
321
322 static int send_creds_clone_wrapper(void *arg)
323 {
324 int sock = PTR_TO_INT(arg);
325 char v = '1'; /* we are the child */
326 struct ucred cred = {
327 .uid = 0,
328 .gid = 0,
329 .pid = 1,
330 };
331
332 return send_creds(sock, &cred, v, true) != SEND_CREDS_OK;
333 }
334
335 /*
336 * Let's use the "standard stack limit" (i.e. glibc thread size default) for
337 * stack sizes: 8MB.
338 */
339 #define __LXCFS_STACK_SIZE (8 * 1024 * 1024)
340 pid_t lxcfs_clone(int (*fn)(void *), void *arg, int flags)
341 {
342 pid_t ret;
343 void *stack;
344
345 stack = malloc(__LXCFS_STACK_SIZE);
346 if (!stack)
347 return ret_errno(ENOMEM);
348
349 #ifdef __ia64__
350 ret = __clone2(fn, stack, __LXCFS_STACK_SIZE, flags | SIGCHLD, arg, NULL);
351 #else
352 ret = clone(fn, stack + __LXCFS_STACK_SIZE, flags | SIGCHLD, arg, NULL);
353 #endif
354 return ret;
355 }
356
357 #define LXCFS_PROC_PID_NS_LEN \
358 (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + \
359 STRLITERALLEN("/ns/pid") + 1)
360
361 /*
362 * clone a task which switches to @task's namespace and writes '1'.
363 * over a unix sock so we can read the task's reaper's pid in our
364 * namespace
365 *
366 * Note: glibc's fork() does not respect pidns, which can lead to failed
367 * assertions inside glibc (and thus failed forks) if the child's pid in
368 * the pidns and the parent pid outside are identical. Using clone prevents
369 * this issue.
370 */
371 static void write_task_init_pid_exit(int sock, pid_t target)
372 {
373 __do_close int fd = -EBADF;
374 char path[LXCFS_PROC_PID_NS_LEN];
375 pid_t pid;
376
377 snprintf(path, sizeof(path), "/proc/%d/ns/pid", (int)target);
378 fd = open(path, O_RDONLY | O_CLOEXEC);
379 if (fd < 0)
380 log_exit("write_task_init_pid_exit open of ns/pid");
381
382 if (setns(fd, 0))
383 log_exit("Failed to setns to pid namespace of process %d", target);
384
385 pid = lxcfs_clone(send_creds_clone_wrapper, INT_TO_PTR(sock), 0);
386 if (pid < 0)
387 _exit(EXIT_FAILURE);
388
389 if (pid != 0) {
390 if (!wait_for_pid(pid))
391 _exit(EXIT_FAILURE);
392
393 _exit(EXIT_SUCCESS);
394 }
395 }
396
397 static pid_t get_init_pid_for_task(pid_t task)
398 {
399 char v = '0';
400 pid_t pid_ret = -1;
401 pid_t pid;
402 int sock[2];
403 struct ucred cred;
404
405 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0)
406 return -1;
407
408 pid = fork();
409 if (pid < 0)
410 goto out;
411
412 if (pid == 0) {
413 close(sock[1]);
414 write_task_init_pid_exit(sock[0], task);
415 _exit(EXIT_SUCCESS);
416 }
417
418 if (!recv_creds(sock[1], &cred, &v))
419 goto out;
420
421 pid_ret = cred.pid;
422
423 out:
424 close(sock[0]);
425 close(sock[1]);
426 if (pid > 0)
427 wait_for_pid(pid);
428
429 return pid_ret;
430 }
431
432 pid_t lookup_initpid_in_store(pid_t pid)
433 {
434 __do_unlock pthread_mutex_t *store_mutex = NULL;
435 pid_t answer = 0;
436 char path[LXCFS_PROC_PID_NS_LEN];
437 struct stat st;
438 struct pidns_init_store *entry;
439
440 snprintf(path, sizeof(path), "/proc/%d/ns/pid", pid);
441
442 if (stat(path, &st))
443 goto out;
444
445 store_mutex = store_lock();
446
447 entry = lookup_verify_initpid(st.st_ino);
448 if (entry) {
449 answer = entry->initpid;
450 goto out;
451 }
452
453 /* release the mutex as the following call is expensive */
454 unlock_mutex(move_ptr(store_mutex));
455 answer = get_init_pid_for_task(pid);
456 store_mutex = store_lock();
457
458 if (answer > 0)
459 save_initpid(st.st_ino, answer);
460
461 out:
462 /*
463 * Prune at the end in case we're returning the value we were about to
464 * return.
465 */
466 prune_initpid_store();
467
468 return answer;
469 }
470
471 /*
472 * Functions needed to setup cgroups in the __constructor__.
473 */
474
475 static bool umount_if_mounted(void)
476 {
477 if (umount2(BASEDIR, MNT_DETACH) < 0 && errno != EINVAL) {
478 lxcfs_error("Failed to unmount %s: %s.\n", BASEDIR, strerror(errno));
479 return false;
480 }
481 return true;
482 }
483
484 /* __typeof__ should be safe to use with all compilers. */
485 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
486 static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val)
487 {
488 return (fs->f_type == (fs_type_magic)magic_val);
489 }
490
491 /*
492 * looking at fs/proc_namespace.c, it appears we can
493 * actually expect the rootfs entry to very specifically contain
494 * " - rootfs rootfs "
495 * IIUC, so long as we've chrooted so that rootfs is not our root,
496 * the rootfs entry should always be skipped in mountinfo contents.
497 */
498 static bool is_on_ramfs(void)
499 {
500 __do_free char *line = NULL;
501 __do_free void *fopen_cache = NULL;
502 __do_fclose FILE *f = NULL;
503 size_t len = 0;
504
505 f = fopen_cached("/proc/self/mountinfo", "re", &fopen_cache);
506 if (!f)
507 return false;
508
509 while (getline(&line, &len, f) != -1) {
510 int i;
511 char *p, *p2;
512
513 for (p = line, i = 0; p && i < 4; i++)
514 p = strchr(p + 1, ' ');
515 if (!p)
516 continue;
517
518 p2 = strchr(p + 1, ' ');
519 if (!p2)
520 continue;
521 *p2 = '\0';
522 if (strcmp(p + 1, "/") == 0) {
523 /* This is '/'. Is it the ramfs? */
524 p = strchr(p2 + 1, '-');
525 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0)
526 return true;
527 }
528 }
529
530 return false;
531 }
532
533 static int pivot_enter()
534 {
535 __do_close int oldroot = -EBADF, newroot = -EBADF;
536
537 oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
538 if (oldroot < 0)
539 return log_error_errno(-1, errno,
540 "Failed to open old root for fchdir");
541
542 newroot = open(ROOTDIR, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
543 if (newroot < 0)
544 return log_error_errno(-1, errno,
545 "Failed to open new root for fchdir");
546
547 /* change into new root fs */
548 if (fchdir(newroot) < 0)
549 return log_error_errno(-1,
550 errno, "Failed to change directory to new rootfs: %s",
551 ROOTDIR);
552
553 /* pivot_root into our new root fs */
554 if (pivot_root(".", ".") < 0)
555 return log_error_errno(-1, errno,
556 "pivot_root() syscall failed: %s",
557 strerror(errno));
558
559 /*
560 * At this point the old-root is mounted on top of our new-root.
561 * To unmounted it we must not be chdir'd into it, so escape back
562 * to the old-root.
563 */
564 if (fchdir(oldroot) < 0)
565 return log_error_errno(-1, errno, "Failed to enter old root");
566
567 if (umount2(".", MNT_DETACH) < 0)
568 return log_error_errno(-1, errno, "Failed to detach old root");
569
570 if (fchdir(newroot) < 0)
571 return log_error_errno(-1, errno, "Failed to re-enter new root");
572
573 return 0;
574 }
575
576 static int chroot_enter()
577 {
578 if (mount(ROOTDIR, "/", NULL, MS_REC | MS_BIND, NULL)) {
579 lxcfs_error("Failed to recursively bind-mount %s into /.", ROOTDIR);
580 return -1;
581 }
582
583 if (chroot(".") < 0) {
584 lxcfs_error("Call to chroot() failed: %s.\n", strerror(errno));
585 return -1;
586 }
587
588 if (chdir("/") < 0) {
589 lxcfs_error("Failed to change directory: %s.\n", strerror(errno));
590 return -1;
591 }
592
593 return 0;
594 }
595
596 static int permute_and_enter(void)
597 {
598 struct statfs sb;
599
600 if (statfs("/", &sb) < 0) {
601 lxcfs_error("%s\n", "Could not stat / mountpoint.");
602 return -1;
603 }
604
605 /* has_fs_type() is not reliable. When the ramfs is a tmpfs it will
606 * likely report TMPFS_MAGIC. Hence, when it reports no we still check
607 * /proc/1/mountinfo. */
608 if (has_fs_type(&sb, RAMFS_MAGIC) || is_on_ramfs())
609 return chroot_enter();
610
611 if (pivot_enter() < 0) {
612 lxcfs_error("%s\n", "Could not perform pivot root.");
613 return -1;
614 }
615
616 return 0;
617 }
618
619 /* Prepare our new clean root. */
620 static int permute_prepare(void)
621 {
622 if (mkdir(ROOTDIR, 0700) < 0 && errno != EEXIST) {
623 lxcfs_error("%s\n", "Failed to create directory for new root.");
624 return -1;
625 }
626
627 if (mount("/", ROOTDIR, NULL, MS_BIND, 0) < 0) {
628 lxcfs_error("Failed to bind-mount / for new root: %s.\n", strerror(errno));
629 return -1;
630 }
631
632 if (mount(RUNTIME_PATH, ROOTDIR RUNTIME_PATH, NULL, MS_BIND, 0) < 0) {
633 lxcfs_error("Failed to bind-mount /run into new root: %s.\n", strerror(errno));
634 return -1;
635 }
636
637 if (mount(BASEDIR, ROOTDIR BASEDIR, NULL, MS_REC | MS_MOVE, 0) < 0) {
638 printf("Failed to move " BASEDIR " into new root: %s.\n", strerror(errno));
639 return -1;
640 }
641
642 return 0;
643 }
644
645 /* Calls chroot() on ramfs, pivot_root() in all other cases. */
646 static bool permute_root(void)
647 {
648 /* Prepare new root. */
649 if (permute_prepare() < 0)
650 return false;
651
652 /* Pivot into new root. */
653 if (permute_and_enter() < 0)
654 return false;
655
656 return true;
657 }
658
659 static bool cgfs_prepare_mounts(void)
660 {
661 if (!mkdir_p(BASEDIR, 0700)) {
662 lxcfs_error("%s\n", "Failed to create lxcfs cgroup mountpoint.");
663 return false;
664 }
665
666 if (!umount_if_mounted()) {
667 lxcfs_error("%s\n", "Failed to clean up old lxcfs cgroup mountpoint.");
668 return false;
669 }
670
671 if (unshare(CLONE_NEWNS) < 0) {
672 lxcfs_error("Failed to unshare mount namespace: %s.\n", strerror(errno));
673 return false;
674 }
675
676 cgroup_ops->mntns_fd = preserve_ns(getpid(), "mnt");
677 if (cgroup_ops->mntns_fd < 0) {
678 lxcfs_error("Failed to preserve mount namespace: %s.\n", strerror(errno));
679 return false;
680 }
681
682 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0) < 0) {
683 lxcfs_error("Failed to remount / private: %s.\n", strerror(errno));
684 return false;
685 }
686
687 if (mount("tmpfs", BASEDIR, "tmpfs", 0, "size=100000,mode=700") < 0) {
688 lxcfs_error("%s\n", "Failed to mount tmpfs over lxcfs cgroup mountpoint.");
689 return false;
690 }
691
692 return true;
693 }
694
695 static bool cgfs_mount_hierarchies(void)
696 {
697 if (!mkdir_p(BASEDIR DEFAULT_CGROUP_MOUNTPOINT, 0755))
698 return false;
699
700 if (!cgroup_ops->mount(cgroup_ops, BASEDIR))
701 return false;
702
703 for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++) {
704 __do_free char *path = must_make_path(BASEDIR, (*h)->mountpoint, NULL);
705 (*h)->fd = open(path, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW);
706 if ((*h)->fd < 0)
707 return false;
708 }
709
710 return true;
711 }
712
713 static bool cgfs_setup_controllers(void)
714 {
715 if (!cgfs_prepare_mounts())
716 return false;
717
718 if (!cgfs_mount_hierarchies())
719 return log_error_errno(false, errno, "Failed to set up private lxcfs cgroup mounts");
720
721 if (!permute_root())
722 return false;
723
724 return true;
725 }
726
727 static void sigusr2_toggle_virtualization(int signo, siginfo_t *info, void *extra)
728 {
729 int ret;
730
731 if (reload_successful) {
732 reload_successful = 0;
733
734 /* write() is async signal safe */
735 ret = write(STDERR_FILENO,
736 "Switched into non-virtualization mode\n",
737 STRLITERALLEN("Switched into non-virtualization mode\n"));
738 if (ret < 0)
739 goto please_compiler;
740 } else {
741 reload_successful = 1;
742
743 /* write() is async signal safe */
744 ret = write(STDERR_FILENO, "Switched into virtualization mode\n",
745 STRLITERALLEN("Switched into virtualization mode\n"));
746 if (ret < 0)
747 goto please_compiler;
748 }
749
750 please_compiler:
751 /*
752 * The write() syscall is a function whose return value needs to be
753 * checked. Otherwise the compiler will warn. This is how we
754 * please our master. Another one could be to use
755 * syscall(__NR_write, ...) directly but whatever.
756 */
757 return;
758 }
759
760 static void __attribute__((constructor)) lxcfs_init(void)
761 {
762 __do_close int init_ns = -EBADF, root_fd = -EBADF,
763 pidfd = -EBADF;
764 int i = 0;
765 pid_t pid;
766
767 lxcfs_info("Running constructor %s to reload liblxcfs", __func__);
768
769 cgroup_ops = cgroup_init();
770 if (!cgroup_ops) {
771 lxcfs_info("Failed to initialize cgroup support");
772 goto broken_upgrade;
773 }
774
775 /* Preserve initial namespace. */
776 pid = getpid();
777 init_ns = preserve_ns(pid, "mnt");
778 if (init_ns < 0) {
779 lxcfs_info("Failed to preserve initial mount namespace");
780 goto broken_upgrade;
781 }
782
783 /* This function calls unshare(CLONE_NEWNS) our initial mount namespace
784 * to privately mount lxcfs cgroups. */
785 if (!cgfs_setup_controllers()) {
786 log_exit("Failed to setup private cgroup mounts for lxcfs");
787 goto broken_upgrade;
788 }
789
790 if (setns(init_ns, 0) < 0) {
791 log_exit("%s - Failed to switch back to initial mount namespace", strerror(errno));
792 goto broken_upgrade;
793 }
794
795 if (!init_cpuview()) {
796 log_exit("Failed to init CPU view");
797 goto broken_upgrade;
798 }
799
800 lxcfs_info("mount namespace: %d", cgroup_ops->mntns_fd);
801 lxcfs_info("hierarchies:");
802
803 for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++, i++) {
804 char **controller_list = (*h)->controllers;
805 __do_free char *controllers = NULL;
806 if (controller_list && *controller_list)
807 controllers = lxc_string_join(",", (const char **)controller_list, false);
808 lxcfs_info(" %2d: fd: %3d: %s", i, (*h)->fd, controllers ?: "");
809 }
810
811 pidfd = pidfd_open(pid, 0);
812 if (pidfd >= 0 && pidfd_send_signal(pidfd, 0, NULL, 0) == 0) {
813 can_use_pidfd = true;
814 lxcfs_info("Kernel supports pidfds");
815 }
816
817 lxcfs_info("api_extensions:");
818 for (i = 0; i < nr_api_extensions; i++)
819 lxcfs_info("- %s", api_extensions[i]);
820
821 root_fd = open("/", O_PATH | O_CLOEXEC);
822 if (root_fd < 0)
823 lxcfs_info("%s - Failed to open root directory", strerror(errno));
824 else if (fchdir(root_fd) < 0)
825 lxcfs_info("%s - Failed to change to root directory", strerror(errno));
826
827 if (install_signal_handler(SIGUSR2, sigusr2_toggle_virtualization)) {
828 lxcfs_info("%s - Failed to install SIGUSR2 signal handler", strerror(errno));
829 goto broken_upgrade;
830 }
831
832 reload_successful = 1;
833 return;
834
835 broken_upgrade:
836 reload_successful = 0;
837 lxcfs_info("Failed to run constructor %s to reload liblxcfs", __func__);
838 }
839
840 static void __attribute__((destructor)) lxcfs_exit(void)
841 {
842 lxcfs_info("Running destructor %s", __func__);
843
844 free_cpuview();
845 cgroup_exit(cgroup_ops);
846 }