2 This file is part of systemd.
4 Copyright 2010 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
23 #include <sys/mount.h>
25 #include <sys/statvfs.h>
28 #include "alloc-util.h"
33 #include "mount-util.h"
34 #include "parse-util.h"
35 #include "path-util.h"
37 #include "stdio-util.h"
38 #include "string-util.h"
40 static int fd_fdinfo_mnt_id(int fd
, const char *filename
, int flags
, int *mnt_id
) {
41 char path
[strlen("/proc/self/fdinfo/") + DECIMAL_STR_MAX(int)];
42 _cleanup_free_
char *fdinfo
= NULL
;
43 _cleanup_close_
int subfd
= -1;
47 if ((flags
& AT_EMPTY_PATH
) && isempty(filename
))
48 xsprintf(path
, "/proc/self/fdinfo/%i", fd
);
50 subfd
= openat(fd
, filename
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_PATH
);
54 xsprintf(path
, "/proc/self/fdinfo/%i", subfd
);
57 r
= read_full_file(path
, &fdinfo
, NULL
);
58 if (r
== -ENOENT
) /* The fdinfo directory is a relatively new addition */
63 p
= startswith(fdinfo
, "mnt_id:");
65 p
= strstr(fdinfo
, "\nmnt_id:");
66 if (!p
) /* The mnt_id field is a relatively new addition */
72 p
+= strspn(p
, WHITESPACE
);
73 p
[strcspn(p
, WHITESPACE
)] = 0;
75 return safe_atoi(p
, mnt_id
);
79 int fd_is_mount_point(int fd
, const char *filename
, int flags
) {
80 union file_handle_union h
= FILE_HANDLE_INIT
, h_parent
= FILE_HANDLE_INIT
;
81 int mount_id
= -1, mount_id_parent
= -1;
82 bool nosupp
= false, check_st_dev
= true;
89 /* First we will try the name_to_handle_at() syscall, which
90 * tells us the mount id and an opaque file "handle". It is
91 * not supported everywhere though (kernel compile-time
92 * option, not all file systems are hooked up). If it works
93 * the mount id is usually good enough to tell us whether
94 * something is a mount point.
96 * If that didn't work we will try to read the mount id from
97 * /proc/self/fdinfo/<fd>. This is almost as good as
98 * name_to_handle_at(), however, does not return the
99 * opaque file handle. The opaque file handle is pretty useful
100 * to detect the root directory, which we should always
101 * consider a mount point. Hence we use this only as
102 * fallback. Exporting the mnt_id in fdinfo is a pretty recent
105 * As last fallback we do traditional fstat() based st_dev
106 * comparisons. This is how things were traditionally done,
107 * but unionfs breaks breaks this since it exposes file
108 * systems with a variety of st_dev reported. Also, btrfs
109 * subvolumes have different st_dev, even though they aren't
110 * real mounts of their own. */
112 r
= name_to_handle_at(fd
, filename
, &h
.handle
, &mount_id
, flags
);
115 /* This kernel does not support name_to_handle_at()
116 * fall back to simpler logic. */
117 goto fallback_fdinfo
;
118 else if (errno
== EOPNOTSUPP
)
119 /* This kernel or file system does not support
120 * name_to_handle_at(), hence let's see if the
121 * upper fs supports it (in which case it is a
122 * mount point), otherwise fallback to the
123 * traditional stat() logic */
129 r
= name_to_handle_at(fd
, "", &h_parent
.handle
, &mount_id_parent
, AT_EMPTY_PATH
);
131 if (errno
== EOPNOTSUPP
) {
133 /* Neither parent nor child do name_to_handle_at()?
134 We have no choice but to fall back. */
135 goto fallback_fdinfo
;
137 /* The parent can't do name_to_handle_at() but the
138 * directory we are interested in can?
139 * If so, it must be a mount point. */
145 /* The parent can do name_to_handle_at() but the
146 * directory we are interested in can't? If so, it
147 * must be a mount point. */
151 /* If the file handle for the directory we are
152 * interested in and its parent are identical, we
153 * assume this is the root directory, which is a mount
156 if (h
.handle
.handle_bytes
== h_parent
.handle
.handle_bytes
&&
157 h
.handle
.handle_type
== h_parent
.handle
.handle_type
&&
158 memcmp(h
.handle
.f_handle
, h_parent
.handle
.f_handle
, h
.handle
.handle_bytes
) == 0)
161 return mount_id
!= mount_id_parent
;
164 r
= fd_fdinfo_mnt_id(fd
, filename
, flags
, &mount_id
);
165 if (r
== -EOPNOTSUPP
)
170 r
= fd_fdinfo_mnt_id(fd
, "", AT_EMPTY_PATH
, &mount_id_parent
);
174 if (mount_id
!= mount_id_parent
)
177 /* Hmm, so, the mount ids are the same. This leaves one
178 * special case though for the root file system. For that,
179 * let's see if the parent directory has the same inode as we
180 * are interested in. Hence, let's also do fstat() checks now,
181 * too, but avoid the st_dev comparisons, since they aren't
182 * that useful on unionfs mounts. */
183 check_st_dev
= false;
186 /* yay for fstatat() taking a different set of flags than the other
188 if (flags
& AT_SYMLINK_FOLLOW
)
189 flags
&= ~AT_SYMLINK_FOLLOW
;
191 flags
|= AT_SYMLINK_NOFOLLOW
;
192 if (fstatat(fd
, filename
, &a
, flags
) < 0)
195 if (fstatat(fd
, "", &b
, AT_EMPTY_PATH
) < 0)
198 /* A directory with same device and inode as its parent? Must
199 * be the root directory */
200 if (a
.st_dev
== b
.st_dev
&&
201 a
.st_ino
== b
.st_ino
)
204 return check_st_dev
&& (a
.st_dev
!= b
.st_dev
);
207 /* flags can be AT_SYMLINK_FOLLOW or 0 */
208 int path_is_mount_point(const char *t
, int flags
) {
209 _cleanup_close_
int fd
= -1;
210 _cleanup_free_
char *canonical
= NULL
, *parent
= NULL
;
214 if (path_equal(t
, "/"))
217 /* we need to resolve symlinks manually, we can't just rely on
218 * fd_is_mount_point() to do that for us; if we have a structure like
219 * /bin -> /usr/bin/ and /usr is a mount point, then the parent that we
220 * look at needs to be /usr, not /. */
221 if (flags
& AT_SYMLINK_FOLLOW
) {
222 canonical
= canonicalize_file_name(t
);
229 parent
= dirname_malloc(t
);
233 fd
= openat(AT_FDCWD
, parent
, O_RDONLY
|O_NONBLOCK
|O_DIRECTORY
|O_CLOEXEC
|O_PATH
);
237 return fd_is_mount_point(fd
, basename(t
), flags
);
240 int umount_recursive(const char *prefix
, int flags
) {
244 /* Try to umount everything recursively below a
245 * directory. Also, take care of stacked mounts, and keep
246 * unmounting them until they are gone. */
249 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
254 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
255 if (!proc_self_mountinfo
)
259 _cleanup_free_
char *path
= NULL
, *p
= NULL
;
262 k
= fscanf(proc_self_mountinfo
,
263 "%*s " /* (1) mount id */
264 "%*s " /* (2) parent id */
265 "%*s " /* (3) major:minor */
266 "%*s " /* (4) root */
267 "%ms " /* (5) mount point */
268 "%*s" /* (6) mount options */
269 "%*[^-]" /* (7) optional fields */
270 "- " /* (8) separator */
271 "%*s " /* (9) file system type */
272 "%*s" /* (10) mount source */
273 "%*s" /* (11) mount options 2 */
274 "%*[^\n]", /* some rubbish at the end */
283 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
287 if (!path_startswith(p
, prefix
))
290 if (umount2(p
, flags
) < 0) {
306 static int get_mount_flags(const char *path
, unsigned long *flags
) {
309 if (statvfs(path
, &buf
) < 0)
315 int bind_remount_recursive(const char *prefix
, bool ro
) {
316 _cleanup_set_free_free_ Set
*done
= NULL
;
317 _cleanup_free_
char *cleaned
= NULL
;
320 /* Recursively remount a directory (and all its submounts)
321 * read-only or read-write. If the directory is already
322 * mounted, we reuse the mount and simply mark it
323 * MS_BIND|MS_RDONLY (or remove the MS_RDONLY for read-write
324 * operation). If it isn't we first make it one. Afterwards we
325 * apply MS_BIND|MS_RDONLY (or remove MS_RDONLY) to all
326 * submounts we can access, too. When mounts are stacked on
327 * the same mount point we only care for each individual
328 * "top-level" mount on each point, as we cannot
329 * influence/access the underlying mounts anyway. We do not
330 * have any effect on future submounts that might get
331 * propagated, they migt be writable. This includes future
332 * submounts that have been triggered via autofs. */
334 cleaned
= strdup(prefix
);
338 path_kill_slashes(cleaned
);
340 done
= set_new(&string_hash_ops
);
345 _cleanup_fclose_
FILE *proc_self_mountinfo
= NULL
;
346 _cleanup_set_free_free_ Set
*todo
= NULL
;
347 bool top_autofs
= false;
349 unsigned long orig_flags
;
351 todo
= set_new(&string_hash_ops
);
355 proc_self_mountinfo
= fopen("/proc/self/mountinfo", "re");
356 if (!proc_self_mountinfo
)
360 _cleanup_free_
char *path
= NULL
, *p
= NULL
, *type
= NULL
;
363 k
= fscanf(proc_self_mountinfo
,
364 "%*s " /* (1) mount id */
365 "%*s " /* (2) parent id */
366 "%*s " /* (3) major:minor */
367 "%*s " /* (4) root */
368 "%ms " /* (5) mount point */
369 "%*s" /* (6) mount options (superblock) */
370 "%*[^-]" /* (7) optional fields */
371 "- " /* (8) separator */
372 "%ms " /* (9) file system type */
373 "%*s" /* (10) mount source */
374 "%*s" /* (11) mount options (bind mount) */
375 "%*[^\n]", /* some rubbish at the end */
385 r
= cunescape(path
, UNESCAPE_RELAX
, &p
);
389 /* Let's ignore autofs mounts. If they aren't
390 * triggered yet, we want to avoid triggering
391 * them, as we don't make any guarantees for
392 * future submounts anyway. If they are
393 * already triggered, then we will find
394 * another entry for this. */
395 if (streq(type
, "autofs")) {
396 top_autofs
= top_autofs
|| path_equal(cleaned
, p
);
400 if (path_startswith(p
, cleaned
) &&
401 !set_contains(done
, p
)) {
403 r
= set_consume(todo
, p
);
413 /* If we have no submounts to process anymore and if
414 * the root is either already done, or an autofs, we
416 if (set_isempty(todo
) &&
417 (top_autofs
|| set_contains(done
, cleaned
)))
420 if (!set_contains(done
, cleaned
) &&
421 !set_contains(todo
, cleaned
)) {
422 /* The prefix directory itself is not yet a
423 * mount, make it one. */
424 if (mount(cleaned
, cleaned
, NULL
, MS_BIND
|MS_REC
, NULL
) < 0)
428 (void) get_mount_flags(cleaned
, &orig_flags
);
429 orig_flags
&= ~MS_RDONLY
;
431 if (mount(NULL
, prefix
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0)
438 r
= set_consume(done
, x
);
443 while ((x
= set_steal_first(todo
))) {
445 r
= set_consume(done
, x
);
446 if (r
== -EEXIST
|| r
== 0)
451 /* Try to reuse the original flag set, but
452 * don't care for errors, in case of
453 * obstructed mounts */
455 (void) get_mount_flags(x
, &orig_flags
);
456 orig_flags
&= ~MS_RDONLY
;
458 if (mount(NULL
, x
, NULL
, orig_flags
|MS_BIND
|MS_REMOUNT
|(ro
? MS_RDONLY
: 0), NULL
) < 0) {
460 /* Deal with mount points that are
461 * obstructed by a later mount */
471 int mount_move_root(const char *path
) {
477 if (mount(path
, "/", NULL
, MS_MOVE
, NULL
) < 0)
489 bool fstype_is_network(const char *fstype
) {
490 static const char table
[] =
505 x
= startswith(fstype
, "fuse.");
509 return nulstr_contains(table
, fstype
);
512 int repeat_unmount(const char *path
, int flags
) {
517 /* If there are multiple mounts on a mount point, this
518 * removes them all */
521 if (umount2(path
, flags
) < 0) {