]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * linux/fs/pnode.c | |
3 | * | |
4 | * (C) Copyright IBM Corporation 2005. | |
5 | * Released under GPL v2. | |
6 | * Author : Ram Pai (linuxram@us.ibm.com) | |
7 | * | |
8 | */ | |
9 | #include <linux/mnt_namespace.h> | |
10 | #include <linux/mount.h> | |
11 | #include <linux/fs.h> | |
12 | #include <linux/nsproxy.h> | |
13 | #include "internal.h" | |
14 | #include "pnode.h" | |
15 | ||
16 | /* return the next shared peer mount of @p */ | |
17 | static inline struct mount *next_peer(struct mount *p) | |
18 | { | |
19 | return list_entry(p->mnt_share.next, struct mount, mnt_share); | |
20 | } | |
21 | ||
22 | static inline struct mount *first_slave(struct mount *p) | |
23 | { | |
24 | return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave); | |
25 | } | |
26 | ||
27 | static inline struct mount *last_slave(struct mount *p) | |
28 | { | |
29 | return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave); | |
30 | } | |
31 | ||
32 | static inline struct mount *next_slave(struct mount *p) | |
33 | { | |
34 | return list_entry(p->mnt_slave.next, struct mount, mnt_slave); | |
35 | } | |
36 | ||
37 | static struct mount *get_peer_under_root(struct mount *mnt, | |
38 | struct mnt_namespace *ns, | |
39 | const struct path *root) | |
40 | { | |
41 | struct mount *m = mnt; | |
42 | ||
43 | do { | |
44 | /* Check the namespace first for optimization */ | |
45 | if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root)) | |
46 | return m; | |
47 | ||
48 | m = next_peer(m); | |
49 | } while (m != mnt); | |
50 | ||
51 | return NULL; | |
52 | } | |
53 | ||
54 | /* | |
55 | * Get ID of closest dominating peer group having a representative | |
56 | * under the given root. | |
57 | * | |
58 | * Caller must hold namespace_sem | |
59 | */ | |
60 | int get_dominating_id(struct mount *mnt, const struct path *root) | |
61 | { | |
62 | struct mount *m; | |
63 | ||
64 | for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { | |
65 | struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root); | |
66 | if (d) | |
67 | return d->mnt_group_id; | |
68 | } | |
69 | ||
70 | return 0; | |
71 | } | |
72 | ||
73 | static int do_make_slave(struct mount *mnt) | |
74 | { | |
75 | struct mount *master, *slave_mnt; | |
76 | ||
77 | if (list_empty(&mnt->mnt_share)) { | |
78 | if (IS_MNT_SHARED(mnt)) { | |
79 | mnt_release_group_id(mnt); | |
80 | CLEAR_MNT_SHARED(mnt); | |
81 | } | |
82 | master = mnt->mnt_master; | |
83 | if (!master) { | |
84 | struct list_head *p = &mnt->mnt_slave_list; | |
85 | while (!list_empty(p)) { | |
86 | slave_mnt = list_first_entry(p, | |
87 | struct mount, mnt_slave); | |
88 | list_del_init(&slave_mnt->mnt_slave); | |
89 | slave_mnt->mnt_master = NULL; | |
90 | } | |
91 | return 0; | |
92 | } | |
93 | } else { | |
94 | struct mount *m; | |
95 | /* | |
96 | * slave 'mnt' to a peer mount that has the | |
97 | * same root dentry. If none is available then | |
98 | * slave it to anything that is available. | |
99 | */ | |
100 | for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) { | |
101 | if (m->mnt.mnt_root == mnt->mnt.mnt_root) { | |
102 | master = m; | |
103 | break; | |
104 | } | |
105 | } | |
106 | list_del_init(&mnt->mnt_share); | |
107 | mnt->mnt_group_id = 0; | |
108 | CLEAR_MNT_SHARED(mnt); | |
109 | } | |
110 | list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave) | |
111 | slave_mnt->mnt_master = master; | |
112 | list_move(&mnt->mnt_slave, &master->mnt_slave_list); | |
113 | list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev); | |
114 | INIT_LIST_HEAD(&mnt->mnt_slave_list); | |
115 | mnt->mnt_master = master; | |
116 | return 0; | |
117 | } | |
118 | ||
119 | /* | |
120 | * vfsmount lock must be held for write | |
121 | */ | |
122 | void change_mnt_propagation(struct mount *mnt, int type) | |
123 | { | |
124 | if (type == MS_SHARED) { | |
125 | set_mnt_shared(mnt); | |
126 | return; | |
127 | } | |
128 | do_make_slave(mnt); | |
129 | if (type != MS_SLAVE) { | |
130 | list_del_init(&mnt->mnt_slave); | |
131 | mnt->mnt_master = NULL; | |
132 | if (type == MS_UNBINDABLE) | |
133 | mnt->mnt.mnt_flags |= MNT_UNBINDABLE; | |
134 | else | |
135 | mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE; | |
136 | } | |
137 | } | |
138 | ||
139 | /* | |
140 | * get the next mount in the propagation tree. | |
141 | * @m: the mount seen last | |
142 | * @origin: the original mount from where the tree walk initiated | |
143 | * | |
144 | * Note that peer groups form contiguous segments of slave lists. | |
145 | * We rely on that in get_source() to be able to find out if | |
146 | * vfsmount found while iterating with propagation_next() is | |
147 | * a peer of one we'd found earlier. | |
148 | */ | |
149 | static struct mount *propagation_next(struct mount *m, | |
150 | struct mount *origin) | |
151 | { | |
152 | /* are there any slaves of this mount? */ | |
153 | if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) | |
154 | return first_slave(m); | |
155 | ||
156 | while (1) { | |
157 | struct mount *master = m->mnt_master; | |
158 | ||
159 | if (master == origin->mnt_master) { | |
160 | struct mount *next = next_peer(m); | |
161 | return (next == origin) ? NULL : next; | |
162 | } else if (m->mnt_slave.next != &master->mnt_slave_list) | |
163 | return next_slave(m); | |
164 | ||
165 | /* back at master */ | |
166 | m = master; | |
167 | } | |
168 | } | |
169 | ||
170 | static struct mount *skip_propagation_subtree(struct mount *m, | |
171 | struct mount *origin) | |
172 | { | |
173 | /* | |
174 | * Advance m such that propagation_next will not return | |
175 | * the slaves of m. | |
176 | */ | |
177 | if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) | |
178 | m = last_slave(m); | |
179 | ||
180 | return m; | |
181 | } | |
182 | ||
183 | static struct mount *next_group(struct mount *m, struct mount *origin) | |
184 | { | |
185 | while (1) { | |
186 | while (1) { | |
187 | struct mount *next; | |
188 | if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) | |
189 | return first_slave(m); | |
190 | next = next_peer(m); | |
191 | if (m->mnt_group_id == origin->mnt_group_id) { | |
192 | if (next == origin) | |
193 | return NULL; | |
194 | } else if (m->mnt_slave.next != &next->mnt_slave) | |
195 | break; | |
196 | m = next; | |
197 | } | |
198 | /* m is the last peer */ | |
199 | while (1) { | |
200 | struct mount *master = m->mnt_master; | |
201 | if (m->mnt_slave.next != &master->mnt_slave_list) | |
202 | return next_slave(m); | |
203 | m = next_peer(master); | |
204 | if (master->mnt_group_id == origin->mnt_group_id) | |
205 | break; | |
206 | if (master->mnt_slave.next == &m->mnt_slave) | |
207 | break; | |
208 | m = master; | |
209 | } | |
210 | if (m == origin) | |
211 | return NULL; | |
212 | } | |
213 | } | |
214 | ||
215 | /* all accesses are serialized by namespace_sem */ | |
216 | static struct user_namespace *user_ns; | |
217 | static struct mount *last_dest, *first_source, *last_source, *dest_master; | |
218 | static struct mountpoint *mp; | |
219 | static struct hlist_head *list; | |
220 | ||
221 | static inline bool peers(struct mount *m1, struct mount *m2) | |
222 | { | |
223 | return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; | |
224 | } | |
225 | ||
226 | static int propagate_one(struct mount *m) | |
227 | { | |
228 | struct mount *child; | |
229 | int type; | |
230 | /* skip ones added by this propagate_mnt() */ | |
231 | if (IS_MNT_NEW(m)) | |
232 | return 0; | |
233 | /* skip if mountpoint isn't covered by it */ | |
234 | if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) | |
235 | return 0; | |
236 | if (peers(m, last_dest)) { | |
237 | type = CL_MAKE_SHARED; | |
238 | } else { | |
239 | struct mount *n, *p; | |
240 | bool done; | |
241 | for (n = m; ; n = p) { | |
242 | p = n->mnt_master; | |
243 | if (p == dest_master || IS_MNT_MARKED(p)) | |
244 | break; | |
245 | } | |
246 | do { | |
247 | struct mount *parent = last_source->mnt_parent; | |
248 | if (last_source == first_source) | |
249 | break; | |
250 | done = parent->mnt_master == p; | |
251 | if (done && peers(n, parent)) | |
252 | break; | |
253 | last_source = last_source->mnt_master; | |
254 | } while (!done); | |
255 | ||
256 | type = CL_SLAVE; | |
257 | /* beginning of peer group among the slaves? */ | |
258 | if (IS_MNT_SHARED(m)) | |
259 | type |= CL_MAKE_SHARED; | |
260 | } | |
261 | ||
262 | /* Notice when we are propagating across user namespaces */ | |
263 | if (m->mnt_ns->user_ns != user_ns) | |
264 | type |= CL_UNPRIVILEGED; | |
265 | child = copy_tree(last_source, last_source->mnt.mnt_root, type); | |
266 | if (IS_ERR(child)) | |
267 | return PTR_ERR(child); | |
268 | child->mnt.mnt_flags &= ~MNT_LOCKED; | |
269 | mnt_set_mountpoint(m, mp, child); | |
270 | last_dest = m; | |
271 | last_source = child; | |
272 | if (m->mnt_master != dest_master) { | |
273 | read_seqlock_excl(&mount_lock); | |
274 | SET_MNT_MARK(m->mnt_master); | |
275 | read_sequnlock_excl(&mount_lock); | |
276 | } | |
277 | hlist_add_head(&child->mnt_hash, list); | |
278 | return count_mounts(m->mnt_ns, child); | |
279 | } | |
280 | ||
281 | /* | |
282 | * mount 'source_mnt' under the destination 'dest_mnt' at | |
283 | * dentry 'dest_dentry'. And propagate that mount to | |
284 | * all the peer and slave mounts of 'dest_mnt'. | |
285 | * Link all the new mounts into a propagation tree headed at | |
286 | * source_mnt. Also link all the new mounts using ->mnt_list | |
287 | * headed at source_mnt's ->mnt_list | |
288 | * | |
289 | * @dest_mnt: destination mount. | |
290 | * @dest_dentry: destination dentry. | |
291 | * @source_mnt: source mount. | |
292 | * @tree_list : list of heads of trees to be attached. | |
293 | */ | |
294 | int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, | |
295 | struct mount *source_mnt, struct hlist_head *tree_list) | |
296 | { | |
297 | struct mount *m, *n; | |
298 | int ret = 0; | |
299 | ||
300 | /* | |
301 | * we don't want to bother passing tons of arguments to | |
302 | * propagate_one(); everything is serialized by namespace_sem, | |
303 | * so globals will do just fine. | |
304 | */ | |
305 | user_ns = current->nsproxy->mnt_ns->user_ns; | |
306 | last_dest = dest_mnt; | |
307 | first_source = source_mnt; | |
308 | last_source = source_mnt; | |
309 | mp = dest_mp; | |
310 | list = tree_list; | |
311 | dest_master = dest_mnt->mnt_master; | |
312 | ||
313 | /* all peers of dest_mnt, except dest_mnt itself */ | |
314 | for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) { | |
315 | ret = propagate_one(n); | |
316 | if (ret) | |
317 | goto out; | |
318 | } | |
319 | ||
320 | /* all slave groups */ | |
321 | for (m = next_group(dest_mnt, dest_mnt); m; | |
322 | m = next_group(m, dest_mnt)) { | |
323 | /* everything in that slave group */ | |
324 | n = m; | |
325 | do { | |
326 | ret = propagate_one(n); | |
327 | if (ret) | |
328 | goto out; | |
329 | n = next_peer(n); | |
330 | } while (n != m); | |
331 | } | |
332 | out: | |
333 | read_seqlock_excl(&mount_lock); | |
334 | hlist_for_each_entry(n, tree_list, mnt_hash) { | |
335 | m = n->mnt_parent; | |
336 | if (m->mnt_master != dest_mnt->mnt_master) | |
337 | CLEAR_MNT_MARK(m->mnt_master); | |
338 | } | |
339 | read_sequnlock_excl(&mount_lock); | |
340 | return ret; | |
341 | } | |
342 | ||
343 | static struct mount *find_topper(struct mount *mnt) | |
344 | { | |
345 | /* If there is exactly one mount covering mnt completely return it. */ | |
346 | struct mount *child; | |
347 | ||
348 | if (!list_is_singular(&mnt->mnt_mounts)) | |
349 | return NULL; | |
350 | ||
351 | child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child); | |
352 | if (child->mnt_mountpoint != mnt->mnt.mnt_root) | |
353 | return NULL; | |
354 | ||
355 | return child; | |
356 | } | |
357 | ||
358 | /* | |
359 | * return true if the refcount is greater than count | |
360 | */ | |
361 | static inline int do_refcount_check(struct mount *mnt, int count) | |
362 | { | |
363 | return mnt_get_count(mnt) > count; | |
364 | } | |
365 | ||
366 | /* | |
367 | * check if the mount 'mnt' can be unmounted successfully. | |
368 | * @mnt: the mount to be checked for unmount | |
369 | * NOTE: unmounting 'mnt' would naturally propagate to all | |
370 | * other mounts its parent propagates to. | |
371 | * Check if any of these mounts that **do not have submounts** | |
372 | * have more references than 'refcnt'. If so return busy. | |
373 | * | |
374 | * vfsmount lock must be held for write | |
375 | */ | |
376 | int propagate_mount_busy(struct mount *mnt, int refcnt) | |
377 | { | |
378 | struct mount *m, *child, *topper; | |
379 | struct mount *parent = mnt->mnt_parent; | |
380 | ||
381 | if (mnt == parent) | |
382 | return do_refcount_check(mnt, refcnt); | |
383 | ||
384 | /* | |
385 | * quickly check if the current mount can be unmounted. | |
386 | * If not, we don't have to go checking for all other | |
387 | * mounts | |
388 | */ | |
389 | if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt)) | |
390 | return 1; | |
391 | ||
392 | for (m = propagation_next(parent, parent); m; | |
393 | m = propagation_next(m, parent)) { | |
394 | int count = 1; | |
395 | child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); | |
396 | if (!child) | |
397 | continue; | |
398 | ||
399 | /* Is there exactly one mount on the child that covers | |
400 | * it completely whose reference should be ignored? | |
401 | */ | |
402 | topper = find_topper(child); | |
403 | if (topper) | |
404 | count += 1; | |
405 | else if (!list_empty(&child->mnt_mounts)) | |
406 | continue; | |
407 | ||
408 | if (do_refcount_check(child, count)) | |
409 | return 1; | |
410 | } | |
411 | return 0; | |
412 | } | |
413 | ||
414 | /* | |
415 | * Clear MNT_LOCKED when it can be shown to be safe. | |
416 | * | |
417 | * mount_lock lock must be held for write | |
418 | */ | |
419 | void propagate_mount_unlock(struct mount *mnt) | |
420 | { | |
421 | struct mount *parent = mnt->mnt_parent; | |
422 | struct mount *m, *child; | |
423 | ||
424 | BUG_ON(parent == mnt); | |
425 | ||
426 | for (m = propagation_next(parent, parent); m; | |
427 | m = propagation_next(m, parent)) { | |
428 | child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint); | |
429 | if (child) | |
430 | child->mnt.mnt_flags &= ~MNT_LOCKED; | |
431 | } | |
432 | } | |
433 | ||
434 | static void umount_one(struct mount *mnt, struct list_head *to_umount) | |
435 | { | |
436 | CLEAR_MNT_MARK(mnt); | |
437 | mnt->mnt.mnt_flags |= MNT_UMOUNT; | |
438 | list_del_init(&mnt->mnt_child); | |
439 | list_del_init(&mnt->mnt_umounting); | |
440 | list_move_tail(&mnt->mnt_list, to_umount); | |
441 | } | |
442 | ||
443 | /* | |
444 | * NOTE: unmounting 'mnt' naturally propagates to all other mounts its | |
445 | * parent propagates to. | |
446 | */ | |
447 | static bool __propagate_umount(struct mount *mnt, | |
448 | struct list_head *to_umount, | |
449 | struct list_head *to_restore) | |
450 | { | |
451 | bool progress = false; | |
452 | struct mount *child; | |
453 | ||
454 | /* | |
455 | * The state of the parent won't change if this mount is | |
456 | * already unmounted or marked as without children. | |
457 | */ | |
458 | if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED)) | |
459 | goto out; | |
460 | ||
461 | /* Verify topper is the only grandchild that has not been | |
462 | * speculatively unmounted. | |
463 | */ | |
464 | list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { | |
465 | if (child->mnt_mountpoint == mnt->mnt.mnt_root) | |
466 | continue; | |
467 | if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child)) | |
468 | continue; | |
469 | /* Found a mounted child */ | |
470 | goto children; | |
471 | } | |
472 | ||
473 | /* Mark mounts that can be unmounted if not locked */ | |
474 | SET_MNT_MARK(mnt); | |
475 | progress = true; | |
476 | ||
477 | /* If a mount is without children and not locked umount it. */ | |
478 | if (!IS_MNT_LOCKED(mnt)) { | |
479 | umount_one(mnt, to_umount); | |
480 | } else { | |
481 | children: | |
482 | list_move_tail(&mnt->mnt_umounting, to_restore); | |
483 | } | |
484 | out: | |
485 | return progress; | |
486 | } | |
487 | ||
488 | static void umount_list(struct list_head *to_umount, | |
489 | struct list_head *to_restore) | |
490 | { | |
491 | struct mount *mnt, *child, *tmp; | |
492 | list_for_each_entry(mnt, to_umount, mnt_list) { | |
493 | list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) { | |
494 | /* topper? */ | |
495 | if (child->mnt_mountpoint == mnt->mnt.mnt_root) | |
496 | list_move_tail(&child->mnt_umounting, to_restore); | |
497 | else | |
498 | umount_one(child, to_umount); | |
499 | } | |
500 | } | |
501 | } | |
502 | ||
503 | static void restore_mounts(struct list_head *to_restore) | |
504 | { | |
505 | /* Restore mounts to a clean working state */ | |
506 | while (!list_empty(to_restore)) { | |
507 | struct mount *mnt, *parent; | |
508 | struct mountpoint *mp; | |
509 | ||
510 | mnt = list_first_entry(to_restore, struct mount, mnt_umounting); | |
511 | CLEAR_MNT_MARK(mnt); | |
512 | list_del_init(&mnt->mnt_umounting); | |
513 | ||
514 | /* Should this mount be reparented? */ | |
515 | mp = mnt->mnt_mp; | |
516 | parent = mnt->mnt_parent; | |
517 | while (parent->mnt.mnt_flags & MNT_UMOUNT) { | |
518 | mp = parent->mnt_mp; | |
519 | parent = parent->mnt_parent; | |
520 | } | |
521 | if (parent != mnt->mnt_parent) | |
522 | mnt_change_mountpoint(parent, mp, mnt); | |
523 | } | |
524 | } | |
525 | ||
526 | static void cleanup_umount_visitations(struct list_head *visited) | |
527 | { | |
528 | while (!list_empty(visited)) { | |
529 | struct mount *mnt = | |
530 | list_first_entry(visited, struct mount, mnt_umounting); | |
531 | list_del_init(&mnt->mnt_umounting); | |
532 | } | |
533 | } | |
534 | ||
535 | /* | |
536 | * collect all mounts that receive propagation from the mount in @list, | |
537 | * and return these additional mounts in the same list. | |
538 | * @list: the list of mounts to be unmounted. | |
539 | * | |
540 | * vfsmount lock must be held for write | |
541 | */ | |
542 | int propagate_umount(struct list_head *list) | |
543 | { | |
544 | struct mount *mnt; | |
545 | LIST_HEAD(to_restore); | |
546 | LIST_HEAD(to_umount); | |
547 | LIST_HEAD(visited); | |
548 | ||
549 | /* Find candidates for unmounting */ | |
550 | list_for_each_entry_reverse(mnt, list, mnt_list) { | |
551 | struct mount *parent = mnt->mnt_parent; | |
552 | struct mount *m; | |
553 | ||
554 | /* | |
555 | * If this mount has already been visited it is known that it's | |
556 | * entire peer group and all of their slaves in the propagation | |
557 | * tree for the mountpoint has already been visited and there is | |
558 | * no need to visit them again. | |
559 | */ | |
560 | if (!list_empty(&mnt->mnt_umounting)) | |
561 | continue; | |
562 | ||
563 | list_add_tail(&mnt->mnt_umounting, &visited); | |
564 | for (m = propagation_next(parent, parent); m; | |
565 | m = propagation_next(m, parent)) { | |
566 | struct mount *child = __lookup_mnt(&m->mnt, | |
567 | mnt->mnt_mountpoint); | |
568 | if (!child) | |
569 | continue; | |
570 | ||
571 | if (!list_empty(&child->mnt_umounting)) { | |
572 | /* | |
573 | * If the child has already been visited it is | |
574 | * know that it's entire peer group and all of | |
575 | * their slaves in the propgation tree for the | |
576 | * mountpoint has already been visited and there | |
577 | * is no need to visit this subtree again. | |
578 | */ | |
579 | m = skip_propagation_subtree(m, parent); | |
580 | continue; | |
581 | } else if (child->mnt.mnt_flags & MNT_UMOUNT) { | |
582 | /* | |
583 | * We have come accross an partially unmounted | |
584 | * mount in list that has not been visited yet. | |
585 | * Remember it has been visited and continue | |
586 | * about our merry way. | |
587 | */ | |
588 | list_add_tail(&child->mnt_umounting, &visited); | |
589 | continue; | |
590 | } | |
591 | ||
592 | /* Check the child and parents while progress is made */ | |
593 | while (__propagate_umount(child, | |
594 | &to_umount, &to_restore)) { | |
595 | /* Is the parent a umount candidate? */ | |
596 | child = child->mnt_parent; | |
597 | if (list_empty(&child->mnt_umounting)) | |
598 | break; | |
599 | } | |
600 | } | |
601 | } | |
602 | ||
603 | umount_list(&to_umount, &to_restore); | |
604 | restore_mounts(&to_restore); | |
605 | cleanup_umount_visitations(&visited); | |
606 | list_splice_tail(&to_umount, list); | |
607 | ||
608 | return 0; | |
609 | } |