]> git.proxmox.com Git - systemd.git/blob - src/core/mount-setup.c
Imported Upstream version 220
[systemd.git] / src / core / mount-setup.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2010 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/mount.h>
23 #include <errno.h>
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <ftw.h>
27
28 #include "mount-setup.h"
29 #include "dev-setup.h"
30 #include "log.h"
31 #include "macro.h"
32 #include "util.h"
33 #include "label.h"
34 #include "set.h"
35 #include "strv.h"
36 #include "mkdir.h"
37 #include "path-util.h"
38 #include "missing.h"
39 #include "virt.h"
40 #include "efivars.h"
41 #include "smack-util.h"
42 #include "cgroup-util.h"
43
44 typedef enum MountMode {
45 MNT_NONE = 0,
46 MNT_FATAL = 1 << 0,
47 MNT_IN_CONTAINER = 1 << 1,
48 } MountMode;
49
50 typedef struct MountPoint {
51 const char *what;
52 const char *where;
53 const char *type;
54 const char *options;
55 unsigned long flags;
56 bool (*condition_fn)(void);
57 MountMode mode;
58 } MountPoint;
59
60 /* The first three entries we might need before SELinux is up. The
61 * fourth (securityfs) is needed by IMA to load a custom policy. The
62 * other ones we can delay until SELinux and IMA are loaded. When
63 * SMACK is enabled we need smackfs, too, so it's a fifth one. */
64 #ifdef HAVE_SMACK
65 #define N_EARLY_MOUNT 5
66 #else
67 #define N_EARLY_MOUNT 4
68 #endif
69
70 static const MountPoint mount_table[] = {
71 { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
72 NULL, MNT_FATAL|MNT_IN_CONTAINER },
73 { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
74 NULL, MNT_FATAL|MNT_IN_CONTAINER },
75 { "devtmpfs", "/dev", "devtmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME,
76 NULL, MNT_FATAL|MNT_IN_CONTAINER },
77 { "securityfs", "/sys/kernel/security", "securityfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
78 NULL, MNT_NONE },
79 #ifdef HAVE_SMACK
80 { "smackfs", "/sys/fs/smackfs", "smackfs", "smackfsdef=*", MS_NOSUID|MS_NOEXEC|MS_NODEV,
81 mac_smack_use, MNT_FATAL },
82 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
83 mac_smack_use, MNT_FATAL },
84 #endif
85 { "tmpfs", "/dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
86 NULL, MNT_FATAL|MNT_IN_CONTAINER },
87 { "devpts", "/dev/pts", "devpts", "mode=620,gid=" STRINGIFY(TTY_GID), MS_NOSUID|MS_NOEXEC,
88 NULL, MNT_IN_CONTAINER },
89 #ifdef HAVE_SMACK
90 { "tmpfs", "/run", "tmpfs", "mode=755,smackfsroot=*", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
91 mac_smack_use, MNT_FATAL },
92 #endif
93 { "tmpfs", "/run", "tmpfs", "mode=755", MS_NOSUID|MS_NODEV|MS_STRICTATIME,
94 NULL, MNT_FATAL|MNT_IN_CONTAINER },
95 { "tmpfs", "/sys/fs/cgroup", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME,
96 NULL, MNT_FATAL|MNT_IN_CONTAINER },
97 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd,xattr", MS_NOSUID|MS_NOEXEC|MS_NODEV,
98 NULL, MNT_IN_CONTAINER },
99 { "cgroup", "/sys/fs/cgroup/systemd", "cgroup", "none,name=systemd", MS_NOSUID|MS_NOEXEC|MS_NODEV,
100 NULL, MNT_FATAL|MNT_IN_CONTAINER },
101 { "pstore", "/sys/fs/pstore", "pstore", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
102 NULL, MNT_NONE },
103 #ifdef ENABLE_EFI
104 { "efivarfs", "/sys/firmware/efi/efivars", "efivarfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
105 is_efi_boot, MNT_NONE },
106 #endif
107 #ifdef ENABLE_KDBUS
108 { "kdbusfs", "/sys/fs/kdbus", "kdbusfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV,
109 NULL, MNT_IN_CONTAINER },
110 #endif
111 };
112
113 /* These are API file systems that might be mounted by other software,
114 * we just list them here so that we know that we should ignore them */
115
116 static const char ignore_paths[] =
117 /* SELinux file systems */
118 "/sys/fs/selinux\0"
119 /* Container bind mounts */
120 "/proc/sys\0"
121 "/dev/console\0"
122 "/proc/kmsg\0";
123
124 bool mount_point_is_api(const char *path) {
125 unsigned i;
126
127 /* Checks if this mount point is considered "API", and hence
128 * should be ignored */
129
130 for (i = 0; i < ELEMENTSOF(mount_table); i ++)
131 if (path_equal(path, mount_table[i].where))
132 return true;
133
134 return path_startswith(path, "/sys/fs/cgroup/");
135 }
136
137 bool mount_point_ignore(const char *path) {
138 const char *i;
139
140 NULSTR_FOREACH(i, ignore_paths)
141 if (path_equal(path, i))
142 return true;
143
144 return false;
145 }
146
147 static int mount_one(const MountPoint *p, bool relabel) {
148 int r;
149
150 assert(p);
151
152 if (p->condition_fn && !p->condition_fn())
153 return 0;
154
155 /* Relabel first, just in case */
156 if (relabel)
157 label_fix(p->where, true, true);
158
159 r = path_is_mount_point(p->where, true);
160 if (r < 0 && r != -ENOENT)
161 return r;
162 if (r > 0)
163 return 0;
164
165 /* Skip securityfs in a container */
166 if (!(p->mode & MNT_IN_CONTAINER) && detect_container(NULL) > 0)
167 return 0;
168
169 /* The access mode here doesn't really matter too much, since
170 * the mounted file system will take precedence anyway. */
171 if (relabel)
172 mkdir_p_label(p->where, 0755);
173 else
174 mkdir_p(p->where, 0755);
175
176 log_debug("Mounting %s to %s of type %s with options %s.",
177 p->what,
178 p->where,
179 p->type,
180 strna(p->options));
181
182 if (mount(p->what,
183 p->where,
184 p->type,
185 p->flags,
186 p->options) < 0) {
187 log_full((p->mode & MNT_FATAL) ? LOG_ERR : LOG_DEBUG, "Failed to mount %s at %s: %m", p->type, p->where);
188 return (p->mode & MNT_FATAL) ? -errno : 0;
189 }
190
191 /* Relabel again, since we now mounted something fresh here */
192 if (relabel)
193 label_fix(p->where, false, false);
194
195 return 1;
196 }
197
198 int mount_setup_early(void) {
199 unsigned i;
200 int r = 0;
201
202 assert_cc(N_EARLY_MOUNT <= ELEMENTSOF(mount_table));
203
204 /* Do a minimal mount of /proc and friends to enable the most
205 * basic stuff, such as SELinux */
206 for (i = 0; i < N_EARLY_MOUNT; i ++) {
207 int j;
208
209 j = mount_one(mount_table + i, false);
210 if (r == 0)
211 r = j;
212 }
213
214 return r;
215 }
216
217 int mount_cgroup_controllers(char ***join_controllers) {
218 _cleanup_set_free_free_ Set *controllers = NULL;
219 int r;
220
221 /* Mount all available cgroup controllers that are built into the kernel. */
222
223 controllers = set_new(&string_hash_ops);
224 if (!controllers)
225 return log_oom();
226
227 r = cg_kernel_controllers(controllers);
228 if (r < 0)
229 return log_error_errno(r, "Failed to enumerate cgroup controllers: %m");
230
231 for (;;) {
232 _cleanup_free_ char *options = NULL, *controller = NULL, *where = NULL;
233 MountPoint p = {
234 .what = "cgroup",
235 .type = "cgroup",
236 .flags = MS_NOSUID|MS_NOEXEC|MS_NODEV,
237 .mode = MNT_IN_CONTAINER,
238 };
239 char ***k = NULL;
240
241 controller = set_steal_first(controllers);
242 if (!controller)
243 break;
244
245 if (join_controllers)
246 for (k = join_controllers; *k; k++)
247 if (strv_find(*k, controller))
248 break;
249
250 if (k && *k) {
251 char **i, **j;
252
253 for (i = *k, j = *k; *i; i++) {
254
255 if (!streq(*i, controller)) {
256 _cleanup_free_ char *t;
257
258 t = set_remove(controllers, *i);
259 if (!t) {
260 free(*i);
261 continue;
262 }
263 }
264
265 *(j++) = *i;
266 }
267
268 *j = NULL;
269
270 options = strv_join(*k, ",");
271 if (!options)
272 return log_oom();
273 } else {
274 options = controller;
275 controller = NULL;
276 }
277
278 where = strappend("/sys/fs/cgroup/", options);
279 if (!where)
280 return log_oom();
281
282 p.where = where;
283 p.options = options;
284
285 r = mount_one(&p, true);
286 if (r < 0)
287 return r;
288
289 if (r > 0 && k && *k) {
290 char **i;
291
292 for (i = *k; *i; i++) {
293 _cleanup_free_ char *t = NULL;
294
295 t = strappend("/sys/fs/cgroup/", *i);
296 if (!t)
297 return log_oom();
298
299 r = symlink(options, t);
300 if (r < 0 && errno != EEXIST)
301 return log_error_errno(errno, "Failed to create symlink %s: %m", t);
302 }
303 }
304 }
305
306 /* Now that we mounted everything, let's make the tmpfs the
307 * cgroup file systems are mounted into read-only. */
308 (void) mount("tmpfs", "/sys/fs/cgroup", "tmpfs", MS_REMOUNT|MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_STRICTATIME|MS_RDONLY, "mode=755");
309
310 return 0;
311 }
312
313 #if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
314 static int nftw_cb(
315 const char *fpath,
316 const struct stat *sb,
317 int tflag,
318 struct FTW *ftwbuf) {
319
320 /* No need to label /dev twice in a row... */
321 if (_unlikely_(ftwbuf->level == 0))
322 return FTW_CONTINUE;
323
324 label_fix(fpath, false, false);
325
326 /* /run/initramfs is static data and big, no need to
327 * dynamically relabel its contents at boot... */
328 if (_unlikely_(ftwbuf->level == 1 &&
329 tflag == FTW_D &&
330 streq(fpath, "/run/initramfs")))
331 return FTW_SKIP_SUBTREE;
332
333 return FTW_CONTINUE;
334 };
335 #endif
336
337 int mount_setup(bool loaded_policy) {
338 unsigned i;
339 int r = 0;
340
341 for (i = 0; i < ELEMENTSOF(mount_table); i ++) {
342 int j;
343
344 j = mount_one(mount_table + i, loaded_policy);
345 if (r == 0)
346 r = j;
347 }
348
349 if (r < 0)
350 return r;
351
352 #if defined(HAVE_SELINUX) || defined(HAVE_SMACK)
353 /* Nodes in devtmpfs and /run need to be manually updated for
354 * the appropriate labels, after mounting. The other virtual
355 * API file systems like /sys and /proc do not need that, they
356 * use the same label for all their files. */
357 if (loaded_policy) {
358 usec_t before_relabel, after_relabel;
359 char timespan[FORMAT_TIMESPAN_MAX];
360
361 before_relabel = now(CLOCK_MONOTONIC);
362
363 nftw("/dev", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
364 nftw("/run", nftw_cb, 64, FTW_MOUNT|FTW_PHYS|FTW_ACTIONRETVAL);
365
366 after_relabel = now(CLOCK_MONOTONIC);
367
368 log_info("Relabelled /dev and /run in %s.",
369 format_timespan(timespan, sizeof(timespan), after_relabel - before_relabel, 0));
370 }
371 #endif
372
373 /* Create a few default symlinks, which are normally created
374 * by udevd, but some scripts might need them before we start
375 * udevd. */
376 dev_setup(NULL, UID_INVALID, GID_INVALID);
377
378 /* Mark the root directory as shared in regards to mount
379 * propagation. The kernel defaults to "private", but we think
380 * it makes more sense to have a default of "shared" so that
381 * nspawn and the container tools work out of the box. If
382 * specific setups need other settings they can reset the
383 * propagation mode to private if needed. */
384 if (detect_container(NULL) <= 0)
385 if (mount(NULL, "/", NULL, MS_REC|MS_SHARED, NULL) < 0)
386 log_warning_errno(errno, "Failed to set up the root directory for shared mount propagation: %m");
387
388 /* Create a few directories we always want around, Note that
389 * sd_booted() checks for /run/systemd/system, so this mkdir
390 * really needs to stay for good, otherwise software that
391 * copied sd-daemon.c into their sources will misdetect
392 * systemd. */
393 mkdir_label("/run/systemd", 0755);
394 mkdir_label("/run/systemd/system", 0755);
395 mkdir_label("/run/systemd/inaccessible", 0000);
396
397 return 0;
398 }