1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2010 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/mount.h>
24 #include <sys/statvfs.h>
26 #include "alloc-util.h"
30 #include "mount-util.h"
31 #include "parse-util.h"
32 #include "path-util.h"
34 #include "stdio-util.h"
35 #include "string-util.h"
38 static int fd_fdinfo_mnt_id(int fd
, const char *filename
, int flags
, int *mnt_id
) {
39 char path
[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
40 _cleanup_free_
char *fdinfo
= NULL
;
41 _cleanup_close_
int subfd
= -1;
45 if ((flags
& AT_EMPTY_PATH
) && isempty(filename
))
46 xsprintf(path
, "/proc/self/fdinfo/%i", fd
);
48 subfd
= openat(fd
, filename
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_PATH
);
52 xsprintf(path
, "/proc/self/fdinfo/%i", subfd
);
55 r
= read_full_file(path
, &fdinfo
, NULL
);
56 if (r
== -ENOENT
) /* The fdinfo directory is a relatively new addition */
61 p
= startswith(fdinfo
, "mnt_id:");
63 p
= strstr(fdinfo
, "\nmnt_id:");
64 if (!p
) /* The mnt_id field is a relatively new addition */
70 p
+= strspn(p
, WHITESPACE
);
71 p
[strcspn(p
, WHITESPACE
)] = 0;
73 return safe_atoi(p
, mnt_id
);
77 int fd_is_mount_point(int fd
, const char *filename
, int flags
) {
78 union file_handle_union h
= FILE_HANDLE_INIT
, h_parent
= FILE_HANDLE_INIT
;
79 int mount_id
= -1, mount_id_parent
= -1;
80 bool nosupp
= false, check_st_dev
= true;
87 /* First we will try the name_to_handle_at() syscall, which
88 * tells us the mount id and an opaque file "handle". It is
89 * not supported everywhere though (kernel compile-time
90 * option, not all file systems are hooked up). If it works
91 * the mount id is usually good enough to tell us whether
92 * something is a mount point.
94 * If that didn't work we will try to read the mount id from
95 * /proc/self/fdinfo/<fd>. This is almost as good as
96 * name_to_handle_at(), however, does not return the
97 * opaque file handle. The opaque file handle is pretty useful
98 * to detect the root directory, which we should always
99 * consider a mount point. Hence we use this only as
100 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
103 * As last fallback we do traditional fstat() based st_dev
104 * comparisons. This is how things were traditionally done,
105 * but unionfs breaks breaks this since it exposes file
106 * systems with a variety of st_dev reported. Also, btrfs
107 * subvolumes have different st_dev, even though they aren't
108 * real mounts of their own. */
110 r
= name_to_handle_at(fd
, filename
, &h
.handle
, &mount_id
, flags
);
113 /* This kernel does not support name_to_handle_at()
114 * fall back to simpler logic. */
115 goto fallback_fdinfo
;
116 else if (errno
== EOPNOTSUPP
)
117 /* This kernel or file system does not support
118 * name_to_handle_at(), hence let's see if the
119 * upper fs supports it (in which case it is a
120 * mount point), otherwise fallback to the
121 * traditional stat() logic */
127 r
= name_to_handle_at(fd
, "", &h_parent
.handle
, &mount_id_parent
, AT_EMPTY_PATH
);
129 if (errno
== EOPNOTSUPP
) {
131 /* Neither parent nor child do name_to_handle_at()?
132 We have no choice but to fall back. */
133 goto fallback_fdinfo
;
135 /* The parent can't do name_to_handle_at() but the
136 * directory we are interested in can?
137 * If so, it must be a mount point. */
143 /* The parent can do name_to_handle_at() but the
144 * directory we are interested in can't? If so, it
145 * must be a mount point. */
149 /* If the file handle for the directory we are
150 * interested in and its parent are identical, we
151 * assume this is the root directory, which is a mount
154 if (h
.handle
.handle_bytes
== h_parent
.handle
.handle_bytes
&&
155 h
.handle
.handle_type
== h_parent
.handle
.handle_type
&&
156 memcmp(h
.handle
.f_handle
, h_parent
.handle
.f_handle
, h
.handle
.handle_bytes
) == 0)
159 return mount_id
!= mount_id_parent
;
162 r
= fd_fdinfo_mnt_id(fd
, filename
, flags
, &mount_id
);
163 if (r
== -EOPNOTSUPP
)
168 r
= fd_fdinfo_mnt_id(fd
, "", AT_EMPTY_PATH
, &mount_id_parent
);
172 if (mount_id
!= mount_id_parent
)
175 /* Hmm, so, the mount ids are the same. This leaves one
176 * special case though for the root file system. For that,
177 * let's see if the parent directory has the same inode as we
178 * are interested in. Hence, let's also do fstat() checks now,
179 * too, but avoid the st_dev comparisons, since they aren't
180 * that useful on unionfs mounts. */
181 check_st_dev
= false;
184 /* yay for fstatat() taking a different set of flags than the other
186 if (flags
& AT_SYMLINK_FOLLOW
)
187 flags
&= ~AT_SYMLINK_FOLLOW
;
189 flags
|= AT_SYMLINK_NOFOLLOW
;
190 if (fstatat(fd
, filename
, &a
, flags
) < 0)
193 if (fstatat(fd
, "", &b
, AT_EMPTY_PATH
) < 0)
196 /* A directory with same device and inode as its parent? Must
197 * be the root directory */
198 if (a
.st_dev
== b
.st_dev
&&
199 a
.st_ino
== b
.st_ino
)
202 return check_st_dev
&& (a
.st_dev
!= b
.st_dev
);
205 /* flags can be AT_SYMLINK_FOLLOW or 0 */
206 int path_is_mount_point(const char *t
, int flags
) {
207 _cleanup_close_
int fd
= -1;
208 _cleanup_free_
char *canonical
= NULL
, *parent
= NULL
;
212 if (path_equal(t
, "/"))
215 /* we need to resolve symlinks manually, we can't just rely on
216 * fd_is_mount_point() to do that for us; if we have a structure like
217 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
218 * look at needs to be /usr, not /. */
219 if (flags
& AT_SYMLINK_FOLLOW
) {
220 canonical
= canonicalize_file_name(t
);
227 parent
= dirname_malloc(t
);
231 fd
= openat(AT_FDCWD
, parent
, O_RDONLY
|O_NONBLOCK
|O_DIRECTORY
|O_CLOEXEC
|O_PATH
);
235 return fd_is_mount_point(fd
, basename(t
), flags
);
238 int umount_recursive(const char *prefix
, int flags
) {
242 /* Try to umount everything recursively below a
243 * directory. Also, take care of stacked mounts, and keep
244 * unmounting them until they are gone. */
247 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
252 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
253 if (!proc_self_mountinfo
)
257 _cleanup_free_
char *path
= NULL
, *p
= NULL
;
260 k
= fscanf(proc_self_mountinfo
,
261 "%*s " /* (1) mount id */
262 "%*s " /* (2) parent id */
263 "%*s " /* (3) major:minor */
264 "%*s " /* (4) root */
265 "%ms " /* (5) mount point */
266 "%*s" /* (6) mount options */
267 "%*[^-]" /* (7) optional fields */
268 "- " /* (8) separator */
269 "%*s " /* (9) file system type */
270 "%*s" /* (10) mount source */
271 "%*s" /* (11) mount options 2 */
272 "%*[^\n]", /* some rubbish at the end */
281 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
285 if (!path_startswith(p
, prefix
))
288 if (umount2(p
, flags
) < 0) {
304 static int get_mount_flags(const char *path
, unsigned long *flags
) {
307 if (statvfs(path
, &buf
) < 0)
313 int bind_remount_recursive(const char *prefix
, bool ro
) {
314 _cleanup_set_free_free_ Set
*done
= NULL
;
315 _cleanup_free_
char *cleaned
= NULL
;
318 /* Recursively remount a directory (and all its submounts)
319 * read-only or read-write. If the directory is already
320 * mounted, we reuse the mount and simply mark it
321 * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
322 * operation). If it isn't we first make it one. Afterwards we
323 * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
324 * submounts we can access, too. When mounts are stacked on
325 * the same mount point we only care for each individual
326 * "top-level" mount on each point, as we cannot
327 * influence/access the underlying mounts anyway. We do not
328 * have any effect on future submounts that might get
329 * propagated, they migt be writable. This includes future
330 * submounts that have been triggered via autofs. */
332 cleaned
= strdup(prefix
);
336 path_kill_slashes(cleaned
);
338 done
= set_new(&string_hash_ops
);
343 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
344 _cleanup_set_free_free_ Set
*todo
= NULL
;
345 bool top_autofs
= false;
347 unsigned long orig_flags
;
349 todo
= set_new(&string_hash_ops
);
353 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
354 if (!proc_self_mountinfo
)
358 _cleanup_free_
char *path
= NULL
, *p
= NULL
, *type
= NULL
;
361 k
= fscanf(proc_self_mountinfo
,
362 "%*s " /* (1) mount id */
363 "%*s " /* (2) parent id */
364 "%*s " /* (3) major:minor */
365 "%*s " /* (4) root */
366 "%ms " /* (5) mount point */
367 "%*s" /* (6) mount options (superblock) */
368 "%*[^-]" /* (7) optional fields */
369 "- " /* (8) separator */
370 "%ms " /* (9) file system type */
371 "%*s" /* (10) mount source */
372 "%*s" /* (11) mount options (bind mount) */
373 "%*[^\n]", /* some rubbish at the end */
383 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
387 /* Let's ignore autofs mounts. If they aren't
388 * triggered yet, we want to avoid triggering
389 * them, as we don't make any guarantees for
390 * future submounts anyway. If they are
391 * already triggered, then we will find
392 * another entry for this. */
393 if (streq(type
, "autofs")) {
394 top_autofs
= top_autofs
|| path_equal(cleaned
, p
);
398 if (path_startswith(p
, cleaned
) &&
399 !set_contains(done
, p
)) {
401 r
= set_consume(todo
, p
);
411 /* If we have no submounts to process anymore and if
412 * the root is either already done, or an autofs, we
414 if (set_isempty(todo
) &&
415 (top_autofs
|| set_contains(done
, cleaned
)))
418 if (!set_contains(done
, cleaned
) &&
419 !set_contains(todo
, cleaned
)) {
420 /* The prefix directory itself is not yet a
421 * mount, make it one. */
422 if (mount(cleaned
, cleaned
, NULL
, MS_BIND
|MS_REC
, NULL
) < 0)
426 (void) get_mount_flags(cleaned
, &orig_flags
);
427 orig_flags
&= ~MS_RDONLY
;
429 if (mount(NULL
, prefix
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0)
436 r
= set_consume(done
, x
);
441 while ((x
= set_steal_first(todo
))) {
443 r
= set_consume(done
, x
);
444 if (r
== -EEXIST
|| r
== 0)
449 /* Try to reuse the original flag set, but
450 * don't care for errors, in case of
451 * obstructed mounts */
453 (void) get_mount_flags(x
, &orig_flags
);
454 orig_flags
&= ~MS_RDONLY
;
456 if (mount(NULL
, x
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0) {
458 /* Deal with mount points that are
459 * obstructed by a later mount */
469 int mount_move_root(const char *path
) {
475 if (mount(path
, "/", NULL
, MS_MOVE
, NULL
) < 0)
487 bool fstype_is_network(const char *fstype
) {
488 static const char table
[] =
503 x
= startswith(fstype
, "fuse.");
507 return nulstr_contains(table
, fstype
);
510 int repeat_unmount(const char *path
, int flags
) {
515 /* If there are multiple mounts on a mount point, this
516 * removes them all */
519 if (umount2(path
, flags
) < 0) {