]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - fs/namespace.c
vhost: scsi: add weight support
[mirror_ubuntu-bionic-kernel.git] / fs / namespace.c
index 9d1374ab6e06f2cd7b57aedf196c53a745a1a683..c34527426ba97551a9844ea31f135c1a6ec2de34 100644 (file)
@@ -517,6 +517,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
        mnt_dec_writers(real_mount(mnt));
        preempt_enable();
 }
+EXPORT_SYMBOL_GPL(__mnt_drop_write);
 
 /**
  * mnt_drop_write - give up write access to a mount
@@ -659,12 +660,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
                return 0;
        mnt = real_mount(bastard);
        mnt_add_count(mnt, 1);
+       smp_mb();                       // see mntput_no_expire()
        if (likely(!read_seqretry(&mount_lock, seq)))
                return 0;
        if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
                mnt_add_count(mnt, -1);
                return 1;
        }
+       lock_mount_hash();
+       if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+               mnt_add_count(mnt, -1);
+               unlock_mount_hash();
+               return 1;
+       }
+       unlock_mount_hash();
+       /* caller will mntput() */
        return -1;
 }
 
@@ -771,9 +781,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
 
        hlist_for_each_entry(mp, chain, m_hash) {
                if (mp->m_dentry == dentry) {
-                       /* might be worth a WARN_ON() */
-                       if (d_unlinked(dentry))
-                               return ERR_PTR(-ENOENT);
                        mp->m_count++;
                        return mp;
                }
@@ -787,6 +794,9 @@ static struct mountpoint *get_mountpoint(struct dentry *dentry)
        int ret;
 
        if (d_mountpoint(dentry)) {
+               /* might be worth a WARN_ON() */
+               if (d_unlinked(dentry))
+                       return ERR_PTR(-ENOENT);
 mountpoint:
                read_seqlock_excl(&mount_lock);
                mp = lookup_mountpoint(dentry);
@@ -846,6 +856,13 @@ static inline int check_mnt(struct mount *mnt)
        return mnt->mnt_ns == current->nsproxy->mnt_ns;
 }
 
+/* for aufs, CONFIG_AUFS_BR_FUSE */
+int is_current_mnt_ns(struct vfsmount *mnt)
+{
+       return check_mnt(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(is_current_mnt_ns);
+
 /*
  * vfsmount lock must be held for write
  */
@@ -1089,7 +1106,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                        goto out_free;
        }
 
-       mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
+       mnt->mnt.mnt_flags = old->mnt.mnt_flags;
+       mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
        /* Don't allow unprivileged users to change mount flags */
        if (flag & CL_UNPRIVILEGED) {
                mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
@@ -1194,12 +1212,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
 static void mntput_no_expire(struct mount *mnt)
 {
        rcu_read_lock();
-       mnt_add_count(mnt, -1);
-       if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+       if (likely(READ_ONCE(mnt->mnt_ns))) {
+               /*
+                * Since we don't do lock_mount_hash() here,
+                * ->mnt_ns can change under us.  However, if it's
+                * non-NULL, then there's a reference that won't
+                * be dropped until after an RCU delay done after
+                * turning ->mnt_ns NULL.  So if we observe it
+                * non-NULL under rcu_read_lock(), the reference
+                * we are dropping is not the final one.
+                */
+               mnt_add_count(mnt, -1);
                rcu_read_unlock();
                return;
        }
        lock_mount_hash();
+       /*
+        * make sure that if __legitimize_mnt() has not seen us grab
+        * mount_lock, we'll see their refcount increment here.
+        */
+       smp_mb();
+       mnt_add_count(mnt, -1);
        if (mnt_get_count(mnt)) {
                rcu_read_unlock();
                unlock_mount_hash();
@@ -1589,7 +1622,7 @@ static int do_umount(struct mount *mnt, int flags)
                 * Special case for "unmounting" root ...
                 * we just try to remount it readonly.
                 */
-               if (!capable(CAP_SYS_ADMIN))
+               if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
                        return -EPERM;
                down_write(&sb->s_umount);
                if (!sb_rdonly(sb))
@@ -1600,8 +1633,13 @@ static int do_umount(struct mount *mnt, int flags)
 
        namespace_lock();
        lock_mount_hash();
-       event++;
 
+       /* Recheck MNT_LOCKED with the locks held */
+       retval = -EINVAL;
+       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+               goto out;
+
+       event++;
        if (flags & MNT_DETACH) {
                if (!list_empty(&mnt->mnt_list))
                        umount_tree(mnt, UMOUNT_PROPAGATE);
@@ -1615,6 +1653,7 @@ static int do_umount(struct mount *mnt, int flags)
                        retval = 0;
                }
        }
+out:
        unlock_mount_hash();
        namespace_unlock();
        return retval;
@@ -1705,7 +1744,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
                goto dput_and_out;
        if (!check_mnt(mnt))
                goto dput_and_out;
-       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+       if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
                goto dput_and_out;
        retval = -EPERM;
        if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
@@ -1783,8 +1822,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                for (s = r; s; s = next_mnt(s, r)) {
                        if (!(flag & CL_COPY_UNBINDABLE) &&
                            IS_MNT_UNBINDABLE(s)) {
-                               s = skip_mnt_tree(s);
-                               continue;
+                               if (s->mnt.mnt_flags & MNT_LOCKED) {
+                                       /* Both unbindable and locked. */
+                                       q = ERR_PTR(-EPERM);
+                                       goto out;
+                               } else {
+                                       s = skip_mnt_tree(s);
+                                       continue;
+                               }
                        }
                        if (!(flag & CL_COPY_MNT_NS_FILE) &&
                            is_mnt_ns_file(s->mnt.mnt_root)) {
@@ -1837,7 +1882,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
 {
        namespace_lock();
        lock_mount_hash();
-       umount_tree(real_mount(mnt), UMOUNT_SYNC);
+       umount_tree(real_mount(mnt), 0);
        unlock_mount_hash();
        namespace_unlock();
 }
@@ -1881,6 +1926,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
        }
        return 0;
 }
+EXPORT_SYMBOL_GPL(iterate_mounts);
 
 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
 {
@@ -2327,7 +2373,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
        down_write(&sb->s_umount);
        if (ms_flags & MS_BIND)
                err = change_mount_flags(path->mnt, ms_flags);
-       else if (!capable(CAP_SYS_ADMIN))
+       else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
                err = -EPERM;
        else
                err = do_remount_sb(sb, sb_flags, data, 0);
@@ -2809,7 +2855,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
                mnt_flags |= MNT_NODIRATIME;
        if (flags & MS_STRICTATIME)
                mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
-       if (flags & SB_RDONLY)
+       if (flags & MS_RDONLY)
                mnt_flags |= MNT_READONLY;
 
        /* The default atime for remount is preservation */