module/os/freebsd/spl/spl_vfs.c

   1 /*
   2  * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/kernel.h>
  30 #include <sys/systm.h>
  31 #include <sys/malloc.h>
  32 #include <sys/mount.h>
  33 #include <sys/cred.h>
  34 #include <sys/vfs.h>
  35 #include <sys/priv.h>
  36 #include <sys/libkern.h>
  37
  38 #include <sys/mutex.h>
  39 #include <sys/vnode.h>
  40 #include <sys/taskq.h>
  41
  42 #include <sys/ccompat.h>
  43
  44 MALLOC_DECLARE(M_MOUNT);
  45
  46 void
  47 vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
  48     int flags __unused)
  49 {
  50         struct vfsopt *opt;
  51         size_t namesize;
  52         int locked;
  53
  54         if (!(locked = mtx_owned(MNT_MTX(vfsp))))
  55                 MNT_ILOCK(vfsp);
  56
  57         if (vfsp->mnt_opt == NULL) {
  58                 void *opts;
  59
  60                 MNT_IUNLOCK(vfsp);
  61                 opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
  62                 MNT_ILOCK(vfsp);
  63                 if (vfsp->mnt_opt == NULL) {
  64                         vfsp->mnt_opt = opts;
  65                         TAILQ_INIT(vfsp->mnt_opt);
  66                 } else {
  67                         free(opts, M_MOUNT);
  68                 }
  69         }
  70
  71         MNT_IUNLOCK(vfsp);
  72
  73         opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
  74         namesize = strlen(name) + 1;
  75         opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
  76         strlcpy(opt->name, name, namesize);
  77         opt->pos = -1;
  78         opt->seen = 1;
  79         if (arg == NULL) {
  80                 opt->value = NULL;
  81                 opt->len = 0;
  82         } else {
  83                 opt->len = strlen(arg) + 1;
  84                 opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
  85                 memcpy(opt->value, arg, opt->len);
  86         }
  87
  88         MNT_ILOCK(vfsp);
  89         TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
  90         if (!locked)
  91                 MNT_IUNLOCK(vfsp);
  92 }
  93
  94 void
  95 vfs_clearmntopt(vfs_t *vfsp, const char *name)
  96 {
  97         int locked;
  98
  99         if (!(locked = mtx_owned(MNT_MTX(vfsp))))
 100                 MNT_ILOCK(vfsp);
 101         vfs_deleteopt(vfsp->mnt_opt, name);
 102         if (!locked)
 103                 MNT_IUNLOCK(vfsp);
 104 }
 105
 106 int
 107 vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
 108 {
 109         struct vfsoptlist *opts = vfsp->mnt_optnew;
 110         int error;
 111
 112         if (opts == NULL)
 113                 return (0);
 114         error = vfs_getopt(opts, opt, (void **)argp, NULL);
 115         return (error != 0 ? 0 : 1);
 116 }
 117
 118 int
 119 mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
 120     char *fspec, int fsflags, vfs_t *parent_vfsp)
 121 {
 122         struct vfsconf *vfsp;
 123         struct mount *mp;
 124         vnode_t *vp, *mvp;
 125         int error;
 126
 127         ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
 128
 129         vp = *vpp;
 130         *vpp = NULL;
 131         error = 0;
 132
 133         /*
 134          * Be ultra-paranoid about making sure the type and fspath
 135          * variables will fit in our mp buffers, including the
 136          * terminating NUL.
 137          */
 138         if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
 139                 error = ENAMETOOLONG;
 140         if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
 141                 error = ENODEV;
 142         if (error == 0 && vp->v_type != VDIR)
 143                 error = ENOTDIR;
 144         /*
 145          * We need vnode lock to protect v_mountedhere and vnode interlock
 146          * to protect v_iflag.
 147          */
 148         if (error == 0) {
 149                 VI_LOCK(vp);
 150                 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
 151                         vp->v_iflag |= VI_MOUNT;
 152                 else
 153                         error = EBUSY;
 154                 VI_UNLOCK(vp);
 155         }
 156         if (error != 0) {
 157                 vput(vp);
 158                 return (error);
 159         }
 160         vn_seqc_write_begin(vp);
 161         VOP_UNLOCK1(vp);
 162
 163         /*
 164          * Allocate and initialize the filesystem.
 165          * We don't want regular user that triggered snapshot mount to be able
 166          * to unmount it, so pass credentials of the parent mount.
 167          */
 168         mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
 169
 170         mp->mnt_optnew = NULL;
 171         vfs_setmntopt(mp, "from", fspec, 0);
 172         mp->mnt_optnew = mp->mnt_opt;
 173         mp->mnt_opt = NULL;
 174
 175         /*
 176          * Set the mount level flags.
 177          */
 178         mp->mnt_flag = fsflags & MNT_UPDATEMASK;
 179         /*
 180          * Snapshots are always read-only.
 181          */
 182         mp->mnt_flag |= MNT_RDONLY;
 183         /*
 184          * We don't want snapshots to allow access to vulnerable setuid
 185          * programs, so we turn off setuid when mounting snapshots.
 186          */
 187         mp->mnt_flag |= MNT_NOSUID;
 188         /*
 189          * We don't want snapshots to be visible in regular
 190          * mount(8) and df(1) output.
 191          */
 192         mp->mnt_flag |= MNT_IGNORE;
 193
 194         error = VFS_MOUNT(mp);
 195         if (error != 0) {
 196                 /*
 197                  * Clear VI_MOUNT and decrement the use count "atomically",
 198                  * under the vnode lock.  This is not strictly required,
 199                  * but makes it easier to reason about the life-cycle and
 200                  * ownership of the covered vnode.
 201                  */
 202                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 203                 VI_LOCK(vp);
 204                 vp->v_iflag &= ~VI_MOUNT;
 205                 VI_UNLOCK(vp);
 206                 vn_seqc_write_end(vp);
 207                 vput(vp);
 208                 vfs_unbusy(mp);
 209                 vfs_freeopts(mp->mnt_optnew);
 210                 mp->mnt_vnodecovered = NULL;
 211                 vfs_mount_destroy(mp);
 212                 return (error);
 213         }
 214
 215         if (mp->mnt_opt != NULL)
 216                 vfs_freeopts(mp->mnt_opt);
 217         mp->mnt_opt = mp->mnt_optnew;
 218         (void) VFS_STATFS(mp, &mp->mnt_stat);
 219
 220 #ifdef VFS_SUPPORTS_EXJAIL_CLONE
 221         /*
 222          * Clone the mnt_exjail credentials of the parent, as required.
 223          */
 224         vfs_exjail_clone(parent_vfsp, mp);
 225 #endif
 226
 227         /*
 228          * Prevent external consumers of mount options from reading
 229          * mnt_optnew.
 230          */
 231         mp->mnt_optnew = NULL;
 232
 233         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 234 #ifdef FREEBSD_NAMECACHE
 235         cache_purge(vp);
 236 #endif
 237         VI_LOCK(vp);
 238         vp->v_iflag &= ~VI_MOUNT;
 239 #ifdef VIRF_MOUNTPOINT
 240         vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
 241 #endif
 242         vp->v_mountedhere = mp;
 243         VI_UNLOCK(vp);
 244         /* Put the new filesystem on the mount list. */
 245         mtx_lock(&mountlist_mtx);
 246         TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 247         mtx_unlock(&mountlist_mtx);
 248         vfs_event_signal(NULL, VQ_MOUNT, 0);
 249         if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
 250                 panic("mount: lost mount");
 251         vn_seqc_write_end(vp);
 252         VOP_UNLOCK1(vp);
 253 #if __FreeBSD_version >= 1300048
 254         vfs_op_exit(mp);
 255 #endif
 256         vfs_unbusy(mp);
 257         *vpp = mvp;
 258         return (0);
 259 }
 260
 261 /*
 262  * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
 263  * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
 264  * the file system as a result of releasing the vnode. Note, file systems
 265  * already have to handle the race where the vnode is incremented before the
 266  * inactive routine is called and does its locking.
 267  *
 268  * Warning: Excessive use of this routine can lead to performance problems.
 269  * This is because taskqs throttle back allocation if too many are created.
 270  */
 271 void
 272 vn_rele_async(vnode_t *vp, taskq_t *taskq)
 273 {
 274         VERIFY3U(vp->v_usecount, >, 0);
 275         if (refcount_release_if_not_last(&vp->v_usecount)) {
 276 #if __FreeBSD_version < 1300045
 277                 vdrop(vp);
 278 #endif
 279                 return;
 280         }
 281         VERIFY3U(taskq_dispatch((taskq_t *)taskq,
 282             (task_func_t *)vrele, vp, TQ_SLEEP), !=, 0);
 283 }