vhost: scsi: add weight support

[mirror_ubuntu-bionic-kernel.git] / fs / namespace.c
diff --git a/fs/namespace.c b/fs/namespace.c

index 9d1374ab6e06f2cd7b57aedf196c53a745a1a683..c34527426ba97551a9844ea31f135c1a6ec2de34 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -517,6 +517,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
         mnt_dec_writers(real_mount(mnt));
         preempt_enable();
  }
+EXPORT_SYMBOL_GPL(__mnt_drop_write);
  
  /**
   * mnt_drop_write - give up write access to a mount
@@ -659,12 +660,21 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
                 return 0;
         mnt = real_mount(bastard);
         mnt_add_count(mnt, 1);
+       smp_mb();                       // see mntput_no_expire()
         if (likely(!read_seqretry(&mount_lock, seq)))
                 return 0;
         if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
                 mnt_add_count(mnt, -1);
                 return 1;
         }
+       lock_mount_hash();
+       if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
+               mnt_add_count(mnt, -1);
+               unlock_mount_hash();
+               return 1;
+       }
+       unlock_mount_hash();
+       /* caller will mntput() */
         return -1;
  }
  
@@ -771,9 +781,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
  
         hlist_for_each_entry(mp, chain, m_hash) {
                 if (mp->m_dentry == dentry) {
-                       /* might be worth a WARN_ON() */
-                       if (d_unlinked(dentry))
-                               return ERR_PTR(-ENOENT);
                         mp->m_count++;
                         return mp;
                 }
@@ -787,6 +794,9 @@ static struct mountpoint *get_mountpoint(struct dentry *dentry)
         int ret;
  
         if (d_mountpoint(dentry)) {
+               /* might be worth a WARN_ON() */
+               if (d_unlinked(dentry))
+                       return ERR_PTR(-ENOENT);
  mountpoint:
                 read_seqlock_excl(&mount_lock);
                 mp = lookup_mountpoint(dentry);
@@ -846,6 +856,13 @@ static inline int check_mnt(struct mount *mnt)
         return mnt->mnt_ns == current->nsproxy->mnt_ns;
  }
  
+/* for aufs, CONFIG_AUFS_BR_FUSE */
+int is_current_mnt_ns(struct vfsmount *mnt)
+{
+       return check_mnt(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(is_current_mnt_ns);
+
  /*
   * vfsmount lock must be held for write
   */
@@ -1089,7 +1106,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
                         goto out_free;
         }
  
-       mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
+       mnt->mnt.mnt_flags = old->mnt.mnt_flags;
+       mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
         /* Don't allow unprivileged users to change mount flags */
         if (flag & CL_UNPRIVILEGED) {
                 mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
@@ -1194,12 +1212,27 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
  static void mntput_no_expire(struct mount *mnt)
  {
         rcu_read_lock();
-       mnt_add_count(mnt, -1);
-       if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
+       if (likely(READ_ONCE(mnt->mnt_ns))) {
+               /*
+                * Since we don't do lock_mount_hash() here,
+                * ->mnt_ns can change under us.  However, if it's
+                * non-NULL, then there's a reference that won't
+                * be dropped until after an RCU delay done after
+                * turning ->mnt_ns NULL.  So if we observe it
+                * non-NULL under rcu_read_lock(), the reference
+                * we are dropping is not the final one.
+                */
+               mnt_add_count(mnt, -1);
                 rcu_read_unlock();
                 return;
         }
         lock_mount_hash();
+       /*
+        * make sure that if __legitimize_mnt() has not seen us grab
+        * mount_lock, we'll see their refcount increment here.
+        */
+       smp_mb();
+       mnt_add_count(mnt, -1);
         if (mnt_get_count(mnt)) {
                 rcu_read_unlock();
                 unlock_mount_hash();
@@ -1589,7 +1622,7 @@ static int do_umount(struct mount *mnt, int flags)
                  * Special case for "unmounting" root ...
                  * we just try to remount it readonly.
                  */
-               if (!capable(CAP_SYS_ADMIN))
+               if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
                         return -EPERM;
                 down_write(&sb->s_umount);
                 if (!sb_rdonly(sb))
@@ -1600,8 +1633,13 @@ static int do_umount(struct mount *mnt, int flags)
  
         namespace_lock();
         lock_mount_hash();
-       event++;
  
+       /* Recheck MNT_LOCKED with the locks held */
+       retval = -EINVAL;
+       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+               goto out;
+
+       event++;
         if (flags & MNT_DETACH) {
                 if (!list_empty(&mnt->mnt_list))
                         umount_tree(mnt, UMOUNT_PROPAGATE);
@@ -1615,6 +1653,7 @@ static int do_umount(struct mount *mnt, int flags)
                         retval = 0;
                 }
         }
+out:
         unlock_mount_hash();
         namespace_unlock();
         return retval;
@@ -1705,7 +1744,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
                 goto dput_and_out;
         if (!check_mnt(mnt))
                 goto dput_and_out;
-       if (mnt->mnt.mnt_flags & MNT_LOCKED)
+       if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
                 goto dput_and_out;
         retval = -EPERM;
         if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
@@ -1783,8 +1822,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                 for (s = r; s; s = next_mnt(s, r)) {
                         if (!(flag & CL_COPY_UNBINDABLE) &&
                             IS_MNT_UNBINDABLE(s)) {
-                               s = skip_mnt_tree(s);
-                               continue;
+                               if (s->mnt.mnt_flags & MNT_LOCKED) {
+                                       /* Both unbindable and locked. */
+                                       q = ERR_PTR(-EPERM);
+                                       goto out;
+                               } else {
+                                       s = skip_mnt_tree(s);
+                                       continue;
+                               }
                         }
                         if (!(flag & CL_COPY_MNT_NS_FILE) &&
                             is_mnt_ns_file(s->mnt.mnt_root)) {
@@ -1837,7 +1882,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
  {
         namespace_lock();
         lock_mount_hash();
-       umount_tree(real_mount(mnt), UMOUNT_SYNC);
+       umount_tree(real_mount(mnt), 0);
         unlock_mount_hash();
         namespace_unlock();
  }
@@ -1881,6 +1926,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
         }
         return 0;
  }
+EXPORT_SYMBOL_GPL(iterate_mounts);
  
  static void cleanup_group_ids(struct mount *mnt, struct mount *end)
  {
@@ -2327,7 +2373,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
         down_write(&sb->s_umount);
         if (ms_flags & MS_BIND)
                 err = change_mount_flags(path->mnt, ms_flags);
-       else if (!capable(CAP_SYS_ADMIN))
+       else if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
                 err = -EPERM;
         else
                 err = do_remount_sb(sb, sb_flags, data, 0);
@@ -2809,7 +2855,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
                 mnt_flags |= MNT_NODIRATIME;
         if (flags & MS_STRICTATIME)
                 mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
-       if (flags & SB_RDONLY)
+       if (flags & MS_RDONLY)
                 mnt_flags |= MNT_READONLY;
  
         /* The default atime for remount is preservation */