]> git.proxmox.com Git - mirror_lxcfs.git/blob - src/bindings.c
bindings: s/get_init_pid_for_task()/scm_init_pid()/g
[mirror_lxcfs.git] / src / bindings.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE
5 #endif
6
7 #ifndef FUSE_USE_VERSION
8 #define FUSE_USE_VERSION 26
9 #endif
10
11 #define _FILE_OFFSET_BITS 64
12
13 #include <dirent.h>
14 #include <errno.h>
15 #include <fcntl.h>
16 #include <fuse.h>
17 #include <inttypes.h>
18 #include <libgen.h>
19 #include <linux/magic.h>
20 #include <linux/sched.h>
21 #include <pthread.h>
22 #include <sched.h>
23 #include <stdarg.h>
24 #include <stdbool.h>
25 #include <stdint.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <sys/epoll.h>
30 #include <sys/mman.h>
31 #include <sys/mount.h>
32 #include <sys/param.h>
33 #include <sys/socket.h>
34 #include <sys/syscall.h>
35 #include <sys/sysinfo.h>
36 #include <sys/vfs.h>
37 #include <time.h>
38 #include <unistd.h>
39 #include <wait.h>
40
41 #include "api_extensions.h"
42 #include "bindings.h"
43 #include "cgroup_fuse.h"
44 #include "cgroups/cgroup.h"
45 #include "cgroups/cgroup_utils.h"
46 #include "config.h"
47 #include "memory_utils.h"
48 #include "proc_cpuview.h"
49 #include "syscall_numbers.h"
50 #include "utils.h"
51
52 static bool can_use_pidfd;
53
54 static volatile sig_atomic_t reload_successful;
55
56 bool liblxcfs_functional(void)
57 {
58 return reload_successful != 0;
59 }
60
61 /* Define pivot_root() if missing from the C library */
62 #ifndef HAVE_PIVOT_ROOT
63 static int pivot_root(const char *new_root, const char *put_old)
64 {
65 return syscall(__NR_pivot_root, new_root, put_old);
66 }
67 #else
68 extern int pivot_root(const char *new_root, const char *put_old);
69 #endif
70
71 /*
72 * A table caching which pid is init for a pid namespace.
73 * When looking up which pid is init for $qpid, we first
74 * 1. Stat /proc/$qpid/ns/pid.
75 * 2. Check whether the ino_t is in our store.
76 * a. if not, fork a child in qpid's ns to send us
77 * ucred.pid = 1, and read the initpid. Cache
78 * initpid and creation time for /proc/initpid
79 * in a new store entry.
80 * b. if so, verify that /proc/initpid still matches
81 * what we have saved. If not, clear the store
82 * entry and go back to a. If so, return the
83 * cached initpid.
84 */
85 struct pidns_init_store {
86 ino_t ino; /* inode number for /proc/$pid/ns/pid */
87 pid_t initpid; /* the pid of nit in that ns */
88 int init_pidfd;
89 int64_t ctime; /* the time at which /proc/$initpid was created */
90 struct pidns_init_store *next;
91 int64_t lastcheck;
92 };
93
94 /* lol - look at how they are allocated in the kernel */
95 #define PIDNS_HASH_SIZE 4096
96 #define HASH(x) ((x) % PIDNS_HASH_SIZE)
97
98 static struct pidns_init_store *pidns_hash_table[PIDNS_HASH_SIZE];
99 static pthread_mutex_t pidns_store_mutex = PTHREAD_MUTEX_INITIALIZER;
100
101 static void mutex_lock(pthread_mutex_t *l)
102 {
103 int ret;
104
105 ret = pthread_mutex_lock(l);
106 if (ret)
107 log_exit("%s - returned %d\n", strerror(ret), ret);
108 }
109
110 struct cgroup_ops *cgroup_ops;
111
112 static void mutex_unlock(pthread_mutex_t *l)
113 {
114 int ret;
115
116 ret = pthread_mutex_unlock(l);
117 if (ret)
118 log_exit("%s - returned %d\n", strerror(ret), ret);
119 }
120
121 static inline void store_lock(void)
122 {
123 mutex_lock(&pidns_store_mutex);
124 }
125
126 static inline void store_unlock(void)
127 {
128 mutex_unlock(&pidns_store_mutex);
129 }
130
131 /* /proc/ = 6
132 * +
133 * <pid-as-str> = INTTYPE_TO_STRLEN(pid_t)
134 * +
135 * \0 = 1
136 */
137 #define LXCFS_PROC_PID_LEN \
138 (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + +1)
139
140 static int initpid_still_valid_pidfd(struct pidns_init_store *entry)
141 {
142 int ret;
143
144 if (entry->init_pidfd < 0)
145 return ret_errno(ENOSYS);
146
147 ret = pidfd_send_signal(entry->init_pidfd, 0, NULL, 0);
148 if (ret < 0) {
149 if (errno == ENOSYS)
150 return ret_errno(ENOSYS);
151
152 return 0;
153 }
154
155 return 1;
156 }
157
158 static int initpid_still_valid_stat(struct pidns_init_store *entry)
159 {
160 struct stat st;
161 char path[LXCFS_PROC_PID_LEN];
162
163 snprintf(path, sizeof(path), "/proc/%d", entry->initpid);
164 if (stat(path, &st) || entry->ctime != st.st_ctime)
165 return 0;
166
167 return 1;
168 }
169
170 /* Must be called under store_lock */
171 static bool initpid_still_valid(struct pidns_init_store *entry)
172 {
173 int ret;
174
175 ret = initpid_still_valid_pidfd(entry);
176 if (ret < 0)
177 ret = initpid_still_valid_stat(entry);
178
179 return ret == 1;
180 }
181
182 /* Must be called under store_lock */
183 static void remove_initpid(struct pidns_init_store *entry)
184 {
185 struct pidns_init_store *it;
186 int ino_hash;
187
188 lxcfs_debug("Removing cached entry for pid %d from init pid cache",
189 entry->initpid);
190
191 ino_hash = HASH(entry->ino);
192 if (pidns_hash_table[ino_hash] == entry) {
193 pidns_hash_table[ino_hash] = entry->next;
194 close_prot_errno_disarm(entry->init_pidfd);
195 free_disarm(entry);
196 return;
197 }
198
199 it = pidns_hash_table[ino_hash];
200 while (it) {
201 if (it->next == entry) {
202 it->next = entry->next;
203 close_prot_errno_disarm(entry->init_pidfd);
204 free_disarm(entry);
205 return;
206 }
207 it = it->next;
208 }
209 }
210
211 #define PURGE_SECS 5
212 /* Must be called under store_lock */
213 static void prune_initpid_store(void)
214 {
215 static int64_t last_prune = 0;
216 int64_t now, threshold;
217
218 if (!last_prune) {
219 last_prune = time(NULL);
220 return;
221 }
222
223 now = time(NULL);
224 if (now < (last_prune + PURGE_SECS))
225 return;
226
227 lxcfs_debug("Pruning init pid cache");
228
229 last_prune = now;
230 threshold = now - 2 * PURGE_SECS;
231
232 for (int i = 0; i < PIDNS_HASH_SIZE; i++) {
233 for (struct pidns_init_store *entry = pidns_hash_table[i], *prev = NULL; entry;) {
234 if (entry->lastcheck < threshold) {
235 struct pidns_init_store *cur = entry;
236
237 lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid);
238
239 if (prev)
240 prev->next = entry->next;
241 else
242 pidns_hash_table[i] = entry->next;
243 entry = entry->next;
244 close_prot_errno_disarm(cur->init_pidfd);
245 free_disarm(cur);
246 } else {
247 prev = entry;
248 entry = entry->next;
249 }
250 }
251 }
252 }
253
254 /* Must be called under store_lock */
255 static void save_initpid(ino_t pidns_inode, pid_t pid)
256 {
257 __do_free struct pidns_init_store *entry = NULL;
258 __do_close int pidfd = -EBADF;
259 const struct lxcfs_opts *opts = fuse_get_context()->private_data;
260 char path[LXCFS_PROC_PID_LEN];
261 struct stat st;
262 int ino_hash;
263
264 if (opts && opts->use_pidfd && can_use_pidfd) {
265 pidfd = pidfd_open(pid, 0);
266 if (pidfd < 0)
267 return;
268 }
269
270 snprintf(path, sizeof(path), "/proc/%d", pid);
271 if (stat(path, &st))
272 return;
273
274 entry = zalloc(sizeof(*entry));
275 if (!entry)
276 return;
277
278 ino_hash = HASH(pidns_inode);
279 *entry = (struct pidns_init_store){
280 .ino = pidns_inode,
281 .initpid = pid,
282 .ctime = st.st_ctime,
283 .next = pidns_hash_table[ino_hash],
284 .lastcheck = time(NULL),
285 .init_pidfd = move_fd(pidfd),
286 };
287 pidns_hash_table[ino_hash] = move_ptr(entry);
288
289 lxcfs_debug("Added cache entry %d for pid %d to init pid cache", ino_hash, pid);
290 }
291
292 /*
293 * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store
294 * entry for the inode number and creation time. Verify that the init pid
295 * is still valid. If not, remove it. Return the entry if valid, NULL
296 * otherwise.
297 * Must be called under store_lock
298 */
299 static pid_t lookup_verify_initpid(ino_t pidns_inode)
300 {
301 struct pidns_init_store *entry = pidns_hash_table[HASH(pidns_inode)];
302
303 while (entry) {
304 if (entry->ino == pidns_inode) {
305 if (initpid_still_valid(entry)) {
306 entry->lastcheck = time(NULL);
307 return entry->initpid;
308 }
309
310 remove_initpid(entry);
311 return ret_errno(ESRCH);
312 }
313 entry = entry->next;
314 }
315
316 return ret_errno(ESRCH);
317 }
318
319 static int send_creds_clone_wrapper(void *arg)
320 {
321 int sock = PTR_TO_INT(arg);
322 char v = '1'; /* we are the child */
323 struct ucred cred = {
324 .uid = 0,
325 .gid = 0,
326 .pid = 1,
327 };
328
329 return send_creds(sock, &cred, v, true) != SEND_CREDS_OK;
330 }
331
332 /*
333 * Let's use the "standard stack limit" (i.e. glibc thread size default) for
334 * stack sizes: 8MB.
335 */
336 #define __LXCFS_STACK_SIZE (8 * 1024 * 1024)
337 pid_t lxcfs_clone(int (*fn)(void *), void *arg, int flags)
338 {
339 pid_t ret;
340 void *stack;
341
342 stack = malloc(__LXCFS_STACK_SIZE);
343 if (!stack)
344 return ret_errno(ENOMEM);
345
346 #ifdef __ia64__
347 ret = __clone2(fn, stack, __LXCFS_STACK_SIZE, flags | SIGCHLD, arg, NULL);
348 #else
349 ret = clone(fn, stack + __LXCFS_STACK_SIZE, flags | SIGCHLD, arg, NULL);
350 #endif
351 return ret;
352 }
353
354 #define LXCFS_PROC_PID_NS_LEN \
355 (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + \
356 STRLITERALLEN("/ns/pid") + 1)
357
358 /*
359 * clone a task which switches to @task's namespace and writes '1'.
360 * over a unix sock so we can read the task's reaper's pid in our
361 * namespace
362 *
363 * Note: glibc's fork() does not respect pidns, which can lead to failed
364 * assertions inside glibc (and thus failed forks) if the child's pid in
365 * the pidns and the parent pid outside are identical. Using clone prevents
366 * this issue.
367 */
368 static void write_task_init_pid_exit(int sock, pid_t target)
369 {
370 __do_close int fd = -EBADF;
371 char path[LXCFS_PROC_PID_NS_LEN];
372 pid_t pid;
373
374 snprintf(path, sizeof(path), "/proc/%d/ns/pid", (int)target);
375 fd = open(path, O_RDONLY | O_CLOEXEC);
376 if (fd < 0)
377 log_exit("write_task_init_pid_exit open of ns/pid");
378
379 if (setns(fd, 0))
380 log_exit("Failed to setns to pid namespace of process %d", target);
381
382 pid = lxcfs_clone(send_creds_clone_wrapper, INT_TO_PTR(sock), 0);
383 if (pid < 0)
384 _exit(EXIT_FAILURE);
385
386 if (pid != 0) {
387 if (!wait_for_pid(pid))
388 _exit(EXIT_FAILURE);
389
390 _exit(EXIT_SUCCESS);
391 }
392 }
393
394 static pid_t scm_init_pid(pid_t task)
395 {
396 char v = '0';
397 pid_t pid_ret = -1;
398 struct ucred cred = {
399 .pid = -1,
400 .uid = -1,
401 .gid = -1,
402 };
403 pid_t pid;
404 int sock[2];
405
406 if (socketpair(AF_UNIX, SOCK_DGRAM, 0, sock) < 0)
407 return -1;
408
409 pid = fork();
410 if (pid < 0)
411 goto out;
412
413 if (pid == 0) {
414 close(sock[1]);
415 write_task_init_pid_exit(sock[0], task);
416 _exit(EXIT_SUCCESS);
417 }
418
419 if (!recv_creds(sock[1], &cred, &v))
420 goto out;
421
422 pid_ret = cred.pid;
423
424 out:
425 close(sock[0]);
426 close(sock[1]);
427 if (pid > 0)
428 wait_for_pid(pid);
429
430 return pid_ret;
431 }
432
433 pid_t lookup_initpid_in_store(pid_t pid)
434 {
435 pid_t hashed_pid = 0;
436 char path[LXCFS_PROC_PID_NS_LEN];
437 struct stat st;
438
439 snprintf(path, sizeof(path), "/proc/%d/ns/pid", pid);
440 if (stat(path, &st))
441 return ret_errno(ESRCH);
442
443 store_lock();
444
445 hashed_pid = lookup_verify_initpid(st.st_ino);
446 if (hashed_pid < 0) {
447 /* release the mutex as the following call is expensive */
448 store_unlock();
449
450 hashed_pid = scm_init_pid(pid);
451
452 store_lock();
453
454 if (hashed_pid > 0)
455 save_initpid(st.st_ino, hashed_pid);
456 }
457
458 /*
459 * Prune at the end in case we're pruning the value
460 * we were about to return.
461 */
462 prune_initpid_store();
463 store_unlock();
464
465 return hashed_pid;
466 }
467
468 /*
469 * Functions needed to setup cgroups in the __constructor__.
470 */
471
472 static bool umount_if_mounted(void)
473 {
474 if (umount2(BASEDIR, MNT_DETACH) < 0 && errno != EINVAL) {
475 lxcfs_error("Failed to unmount %s: %s.\n", BASEDIR, strerror(errno));
476 return false;
477 }
478 return true;
479 }
480
481 /* __typeof__ should be safe to use with all compilers. */
482 typedef __typeof__(((struct statfs *)NULL)->f_type) fs_type_magic;
483 static bool has_fs_type(const struct statfs *fs, fs_type_magic magic_val)
484 {
485 return (fs->f_type == (fs_type_magic)magic_val);
486 }
487
488 /*
489 * looking at fs/proc_namespace.c, it appears we can
490 * actually expect the rootfs entry to very specifically contain
491 * " - rootfs rootfs "
492 * IIUC, so long as we've chrooted so that rootfs is not our root,
493 * the rootfs entry should always be skipped in mountinfo contents.
494 */
495 static bool is_on_ramfs(void)
496 {
497 __do_free char *line = NULL;
498 __do_free void *fopen_cache = NULL;
499 __do_fclose FILE *f = NULL;
500 size_t len = 0;
501
502 f = fopen_cached("/proc/self/mountinfo", "re", &fopen_cache);
503 if (!f)
504 return false;
505
506 while (getline(&line, &len, f) != -1) {
507 int i;
508 char *p, *p2;
509
510 for (p = line, i = 0; p && i < 4; i++)
511 p = strchr(p + 1, ' ');
512 if (!p)
513 continue;
514
515 p2 = strchr(p + 1, ' ');
516 if (!p2)
517 continue;
518 *p2 = '\0';
519 if (strcmp(p + 1, "/") == 0) {
520 /* This is '/'. Is it the ramfs? */
521 p = strchr(p2 + 1, '-');
522 if (p && strncmp(p, "- rootfs rootfs ", 16) == 0)
523 return true;
524 }
525 }
526
527 return false;
528 }
529
530 static int pivot_enter()
531 {
532 __do_close int oldroot = -EBADF, newroot = -EBADF;
533
534 oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
535 if (oldroot < 0)
536 return log_error_errno(-1, errno,
537 "Failed to open old root for fchdir");
538
539 newroot = open(ROOTDIR, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
540 if (newroot < 0)
541 return log_error_errno(-1, errno,
542 "Failed to open new root for fchdir");
543
544 /* change into new root fs */
545 if (fchdir(newroot) < 0)
546 return log_error_errno(-1,
547 errno, "Failed to change directory to new rootfs: %s",
548 ROOTDIR);
549
550 /* pivot_root into our new root fs */
551 if (pivot_root(".", ".") < 0)
552 return log_error_errno(-1, errno,
553 "pivot_root() syscall failed: %s",
554 strerror(errno));
555
556 /*
557 * At this point the old-root is mounted on top of our new-root.
558 * To unmounted it we must not be chdir'd into it, so escape back
559 * to the old-root.
560 */
561 if (fchdir(oldroot) < 0)
562 return log_error_errno(-1, errno, "Failed to enter old root");
563
564 if (umount2(".", MNT_DETACH) < 0)
565 return log_error_errno(-1, errno, "Failed to detach old root");
566
567 if (fchdir(newroot) < 0)
568 return log_error_errno(-1, errno, "Failed to re-enter new root");
569
570 return 0;
571 }
572
573 static int chroot_enter()
574 {
575 if (mount(ROOTDIR, "/", NULL, MS_REC | MS_BIND, NULL)) {
576 lxcfs_error("Failed to recursively bind-mount %s into /.", ROOTDIR);
577 return -1;
578 }
579
580 if (chroot(".") < 0) {
581 lxcfs_error("Call to chroot() failed: %s.\n", strerror(errno));
582 return -1;
583 }
584
585 if (chdir("/") < 0) {
586 lxcfs_error("Failed to change directory: %s.\n", strerror(errno));
587 return -1;
588 }
589
590 return 0;
591 }
592
593 static int permute_and_enter(void)
594 {
595 struct statfs sb;
596
597 if (statfs("/", &sb) < 0) {
598 lxcfs_error("%s\n", "Could not stat / mountpoint.");
599 return -1;
600 }
601
602 /* has_fs_type() is not reliable. When the ramfs is a tmpfs it will
603 * likely report TMPFS_MAGIC. Hence, when it reports no we still check
604 * /proc/1/mountinfo. */
605 if (has_fs_type(&sb, RAMFS_MAGIC) || is_on_ramfs())
606 return chroot_enter();
607
608 if (pivot_enter() < 0) {
609 lxcfs_error("%s\n", "Could not perform pivot root.");
610 return -1;
611 }
612
613 return 0;
614 }
615
616 /* Prepare our new clean root. */
617 static int permute_prepare(void)
618 {
619 if (mkdir(ROOTDIR, 0700) < 0 && errno != EEXIST) {
620 lxcfs_error("%s\n", "Failed to create directory for new root.");
621 return -1;
622 }
623
624 if (mount("/", ROOTDIR, NULL, MS_BIND, 0) < 0) {
625 lxcfs_error("Failed to bind-mount / for new root: %s.\n", strerror(errno));
626 return -1;
627 }
628
629 if (mount(RUNTIME_PATH, ROOTDIR RUNTIME_PATH, NULL, MS_BIND, 0) < 0) {
630 lxcfs_error("Failed to bind-mount /run into new root: %s.\n", strerror(errno));
631 return -1;
632 }
633
634 if (mount(BASEDIR, ROOTDIR BASEDIR, NULL, MS_REC | MS_MOVE, 0) < 0) {
635 printf("Failed to move " BASEDIR " into new root: %s.\n", strerror(errno));
636 return -1;
637 }
638
639 return 0;
640 }
641
642 /* Calls chroot() on ramfs, pivot_root() in all other cases. */
643 static bool permute_root(void)
644 {
645 /* Prepare new root. */
646 if (permute_prepare() < 0)
647 return false;
648
649 /* Pivot into new root. */
650 if (permute_and_enter() < 0)
651 return false;
652
653 return true;
654 }
655
656 static bool cgfs_prepare_mounts(void)
657 {
658 if (!mkdir_p(BASEDIR, 0700)) {
659 lxcfs_error("%s\n", "Failed to create lxcfs cgroup mountpoint.");
660 return false;
661 }
662
663 if (!umount_if_mounted()) {
664 lxcfs_error("%s\n", "Failed to clean up old lxcfs cgroup mountpoint.");
665 return false;
666 }
667
668 if (unshare(CLONE_NEWNS) < 0) {
669 lxcfs_error("Failed to unshare mount namespace: %s.\n", strerror(errno));
670 return false;
671 }
672
673 cgroup_ops->mntns_fd = preserve_ns(getpid(), "mnt");
674 if (cgroup_ops->mntns_fd < 0) {
675 lxcfs_error("Failed to preserve mount namespace: %s.\n", strerror(errno));
676 return false;
677 }
678
679 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0) < 0) {
680 lxcfs_error("Failed to remount / private: %s.\n", strerror(errno));
681 return false;
682 }
683
684 if (mount("tmpfs", BASEDIR, "tmpfs", 0, "size=100000,mode=700") < 0) {
685 lxcfs_error("%s\n", "Failed to mount tmpfs over lxcfs cgroup mountpoint.");
686 return false;
687 }
688
689 return true;
690 }
691
692 static bool cgfs_mount_hierarchies(void)
693 {
694 if (!mkdir_p(BASEDIR DEFAULT_CGROUP_MOUNTPOINT, 0755))
695 return false;
696
697 if (!cgroup_ops->mount(cgroup_ops, BASEDIR))
698 return false;
699
700 for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++) {
701 __do_free char *path = must_make_path(BASEDIR, (*h)->mountpoint, NULL);
702 (*h)->fd = open(path, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW);
703 if ((*h)->fd < 0)
704 return false;
705 }
706
707 return true;
708 }
709
710 static bool cgfs_setup_controllers(void)
711 {
712 if (!cgfs_prepare_mounts())
713 return false;
714
715 if (!cgfs_mount_hierarchies())
716 return log_error_errno(false, errno, "Failed to set up private lxcfs cgroup mounts");
717
718 if (!permute_root())
719 return false;
720
721 return true;
722 }
723
724 static void sigusr2_toggle_virtualization(int signo, siginfo_t *info, void *extra)
725 {
726 int ret;
727
728 if (reload_successful) {
729 reload_successful = 0;
730
731 /* write() is async signal safe */
732 ret = write(STDERR_FILENO,
733 "Switched into non-virtualization mode\n",
734 STRLITERALLEN("Switched into non-virtualization mode\n"));
735 if (ret < 0)
736 goto please_compiler;
737 } else {
738 reload_successful = 1;
739
740 /* write() is async signal safe */
741 ret = write(STDERR_FILENO, "Switched into virtualization mode\n",
742 STRLITERALLEN("Switched into virtualization mode\n"));
743 if (ret < 0)
744 goto please_compiler;
745 }
746
747 please_compiler:
748 /*
749 * The write() syscall is a function whose return value needs to be
750 * checked. Otherwise the compiler will warn. This is how we
751 * please our master. Another one could be to use
752 * syscall(__NR_write, ...) directly but whatever.
753 */
754 return;
755 }
756
757 static void __attribute__((constructor)) lxcfs_init(void)
758 {
759 __do_close int init_ns = -EBADF, root_fd = -EBADF,
760 pidfd = -EBADF;
761 int i = 0;
762 pid_t pid;
763
764 lxcfs_info("Running constructor %s to reload liblxcfs", __func__);
765
766 cgroup_ops = cgroup_init();
767 if (!cgroup_ops) {
768 lxcfs_info("Failed to initialize cgroup support");
769 goto broken_upgrade;
770 }
771
772 /* Preserve initial namespace. */
773 pid = getpid();
774 init_ns = preserve_ns(pid, "mnt");
775 if (init_ns < 0) {
776 lxcfs_info("Failed to preserve initial mount namespace");
777 goto broken_upgrade;
778 }
779
780 /* This function calls unshare(CLONE_NEWNS) our initial mount namespace
781 * to privately mount lxcfs cgroups. */
782 if (!cgfs_setup_controllers()) {
783 log_exit("Failed to setup private cgroup mounts for lxcfs");
784 goto broken_upgrade;
785 }
786
787 if (setns(init_ns, 0) < 0) {
788 log_exit("%s - Failed to switch back to initial mount namespace", strerror(errno));
789 goto broken_upgrade;
790 }
791
792 if (!init_cpuview()) {
793 log_exit("Failed to init CPU view");
794 goto broken_upgrade;
795 }
796
797 lxcfs_info("mount namespace: %d", cgroup_ops->mntns_fd);
798 lxcfs_info("hierarchies:");
799
800 for (struct hierarchy **h = cgroup_ops->hierarchies; h && *h; h++, i++) {
801 char **controller_list = (*h)->controllers;
802 __do_free char *controllers = NULL;
803 if (controller_list && *controller_list)
804 controllers = lxc_string_join(",", (const char **)controller_list, false);
805 lxcfs_info(" %2d: fd: %3d: %s", i, (*h)->fd, controllers ?: "");
806 }
807
808 pidfd = pidfd_open(pid, 0);
809 if (pidfd >= 0 && pidfd_send_signal(pidfd, 0, NULL, 0) == 0) {
810 can_use_pidfd = true;
811 lxcfs_info("Kernel supports pidfds");
812 }
813
814 lxcfs_info("api_extensions:");
815 for (i = 0; i < nr_api_extensions; i++)
816 lxcfs_info("- %s", api_extensions[i]);
817
818 root_fd = open("/", O_PATH | O_CLOEXEC);
819 if (root_fd < 0)
820 lxcfs_info("%s - Failed to open root directory", strerror(errno));
821 else if (fchdir(root_fd) < 0)
822 lxcfs_info("%s - Failed to change to root directory", strerror(errno));
823
824 if (install_signal_handler(SIGUSR2, sigusr2_toggle_virtualization)) {
825 lxcfs_info("%s - Failed to install SIGUSR2 signal handler", strerror(errno));
826 goto broken_upgrade;
827 }
828
829 reload_successful = 1;
830 return;
831
832 broken_upgrade:
833 reload_successful = 0;
834 lxcfs_info("Failed to run constructor %s to reload liblxcfs", __func__);
835 }
836
837 static void __attribute__((destructor)) lxcfs_exit(void)
838 {
839 lxcfs_info("Running destructor %s", __func__);
840
841 free_cpuview();
842 cgroup_exit(cgroup_ops);
843 }