module/os/freebsd/spl/spl_vfs.c

   1 /*
   2  * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26
  27 #include <sys/cdefs.h>
  28 __FBSDID("$FreeBSD$");
  29
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/kernel.h>
  33 #include <sys/systm.h>
  34 #include <sys/malloc.h>
  35 #include <sys/mount.h>
  36 #include <sys/cred.h>
  37 #include <sys/vfs.h>
  38 #include <sys/priv.h>
  39 #include <sys/libkern.h>
  40
  41 #include <sys/mutex.h>
  42 #include <sys/vnode.h>
  43 #include <sys/taskq.h>
  44
  45 #include <sys/ccompat.h>
  46
  47 MALLOC_DECLARE(M_MOUNT);
  48
  49 void
  50 vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
  51     int flags __unused)
  52 {
  53         struct vfsopt *opt;
  54         size_t namesize;
  55         int locked;
  56
  57         if (!(locked = mtx_owned(MNT_MTX(vfsp))))
  58                 MNT_ILOCK(vfsp);
  59
  60         if (vfsp->mnt_opt == NULL) {
  61                 void *opts;
  62
  63                 MNT_IUNLOCK(vfsp);
  64                 opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
  65                 MNT_ILOCK(vfsp);
  66                 if (vfsp->mnt_opt == NULL) {
  67                         vfsp->mnt_opt = opts;
  68                         TAILQ_INIT(vfsp->mnt_opt);
  69                 } else {
  70                         free(opts, M_MOUNT);
  71                 }
  72         }
  73
  74         MNT_IUNLOCK(vfsp);
  75
  76         opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
  77         namesize = strlen(name) + 1;
  78         opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
  79         strlcpy(opt->name, name, namesize);
  80         opt->pos = -1;
  81         opt->seen = 1;
  82         if (arg == NULL) {
  83                 opt->value = NULL;
  84                 opt->len = 0;
  85         } else {
  86                 opt->len = strlen(arg) + 1;
  87                 opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  88                 bcopy(arg, opt->value, opt->len);
  89         }
  90
  91         MNT_ILOCK(vfsp);
  92         TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
  93         if (!locked)
  94                 MNT_IUNLOCK(vfsp);
  95 }
  96
  97 void
  98 vfs_clearmntopt(vfs_t *vfsp, const char *name)
  99 {
 100         int locked;
 101
 102         if (!(locked = mtx_owned(MNT_MTX(vfsp))))
 103                 MNT_ILOCK(vfsp);
 104         vfs_deleteopt(vfsp->mnt_opt, name);
 105         if (!locked)
 106                 MNT_IUNLOCK(vfsp);
 107 }
 108
 109 int
 110 vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
 111 {
 112         struct vfsoptlist *opts = vfsp->mnt_optnew;
 113         int error;
 114
 115         if (opts == NULL)
 116                 return (0);
 117         error = vfs_getopt(opts, opt, (void **)argp, NULL);
 118         return (error != 0 ? 0 : 1);
 119 }
 120
 121 int
 122 mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
 123     char *fspec, int fsflags)
 124 {
 125         struct vfsconf *vfsp;
 126         struct mount *mp;
 127         vnode_t *vp, *mvp;
 128         struct ucred *cr;
 129         int error;
 130
 131         ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
 132
 133         vp = *vpp;
 134         *vpp = NULL;
 135         error = 0;
 136
 137         /*
 138          * Be ultra-paranoid about making sure the type and fspath
 139          * variables will fit in our mp buffers, including the
 140          * terminating NUL.
 141          */
 142         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
 143                 error = ENAMETOOLONG;
 144         if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
 145                 error = ENODEV;
 146         if (error == 0 && vp->v_type != VDIR)
 147                 error = ENOTDIR;
 148         /*
 149          * We need vnode lock to protect v_mountedhere and vnode interlock
 150          * to protect v_iflag.
 151          */
 152         if (error == 0) {
 153                 VI_LOCK(vp);
 154                 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
 155                         vp->v_iflag |= VI_MOUNT;
 156                 else
 157                         error = EBUSY;
 158                 VI_UNLOCK(vp);
 159         }
 160         if (error != 0) {
 161                 vput(vp);
 162                 return (error);
 163         }
 164         VOP_UNLOCK1(vp);
 165
 166         /*
 167          * Allocate and initialize the filesystem.
 168          * We don't want regular user that triggered snapshot mount to be able
 169          * to unmount it, so pass credentials of the parent mount.
 170          */
 171         mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
 172
 173         mp->mnt_optnew = NULL;
 174         vfs_setmntopt(mp, "from", fspec, 0);
 175         mp->mnt_optnew = mp->mnt_opt;
 176         mp->mnt_opt = NULL;
 177
 178         /*
 179          * Set the mount level flags.
 180          */
 181         mp->mnt_flag = fsflags & MNT_UPDATEMASK;
 182         /*
 183          * Snapshots are always read-only.
 184          */
 185         mp->mnt_flag |= MNT_RDONLY;
 186         /*
 187          * We don't want snapshots to allow access to vulnerable setuid
 188          * programs, so we turn off setuid when mounting snapshots.
 189          */
 190         mp->mnt_flag |= MNT_NOSUID;
 191         /*
 192          * We don't want snapshots to be visible in regular
 193          * mount(8) and df(1) output.
 194          */
 195         mp->mnt_flag |= MNT_IGNORE;
 196         /*
 197          * XXX: This is evil, but we can't mount a snapshot as a regular user.
 198          * XXX: Is is safe when snapshot is mounted from within a jail?
 199          */
 200         cr = td->td_ucred;
 201         td->td_ucred = kcred;
 202         error = VFS_MOUNT(mp);
 203         td->td_ucred = cr;
 204
 205         if (error != 0) {
 206                 /*
 207                  * Clear VI_MOUNT and decrement the use count "atomically",
 208                  * under the vnode lock.  This is not strictly required,
 209                  * but makes it easier to reason about the life-cycle and
 210                  * ownership of the covered vnode.
 211                  */
 212                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 213                 VI_LOCK(vp);
 214                 vp->v_iflag &= ~VI_MOUNT;
 215                 VI_UNLOCK(vp);
 216                 vput(vp);
 217                 vfs_unbusy(mp);
 218                 vfs_freeopts(mp->mnt_optnew);
 219                 mp->mnt_vnodecovered = NULL;
 220                 vfs_mount_destroy(mp);
 221                 return (error);
 222         }
 223
 224         if (mp->mnt_opt != NULL)
 225                 vfs_freeopts(mp->mnt_opt);
 226         mp->mnt_opt = mp->mnt_optnew;
 227         (void) VFS_STATFS(mp, &mp->mnt_stat);
 228
 229         /*
 230          * Prevent external consumers of mount options from reading
 231          * mnt_optnew.
 232          */
 233         mp->mnt_optnew = NULL;
 234
 235         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 236 #ifdef FREEBSD_NAMECACHE
 237         cache_purge(vp);
 238 #endif
 239         VI_LOCK(vp);
 240         vp->v_iflag &= ~VI_MOUNT;
 241         VI_UNLOCK(vp);
 242
 243         vp->v_mountedhere = mp;
 244         /* Put the new filesystem on the mount list. */
 245         mtx_lock(&mountlist_mtx);
 246         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 247         mtx_unlock(&mountlist_mtx);
 248         vfs_event_signal(NULL, VQ_MOUNT, 0);
 249         if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
 250                 panic("mount: lost mount");
 251         VOP_UNLOCK1(vp);
 252 #if __FreeBSD_version >= 1300048
 253         vfs_op_exit(mp);
 254 #endif
 255         vfs_unbusy(mp);
 256         *vpp = mvp;
 257         return (0);
 258 }
 259
 260 /*
 261  * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
 262  * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
 263  * the file system as a result of releasing the vnode. Note, file systems
 264  * already have to handle the race where the vnode is incremented before the
 265  * inactive routine is called and does its locking.
 266  *
 267  * Warning: Excessive use of this routine can lead to performance problems.
 268  * This is because taskqs throttle back allocation if too many are created.
 269  */
 270 void
 271 vn_rele_async(vnode_t *vp, taskq_t *taskq)
 272 {
 273         VERIFY(vp->v_count > 0);
 274         if (refcount_release_if_not_last(&vp->v_usecount)) {
 275 #if __FreeBSD_version < 1300045
 276                 vdrop(vp);
 277 #endif
 278                 return;
 279         }
 280         VERIFY(taskq_dispatch((taskq_t *)taskq,
 281             (task_func_t *)vrele, vp, TQ_SLEEP) != 0);
 282 }