]> git.proxmox.com Git - mirror_zfs.git/blob - module/os/freebsd/spl/spl_vfs.c
Rename refcount.h to zfs_refcount.h
[mirror_zfs.git] / module / os / freebsd / spl / spl_vfs.c
1 /*
2 * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/kernel.h>
33 #include <sys/systm.h>
34 #include <sys/malloc.h>
35 #include <sys/mount.h>
36 #include <sys/cred.h>
37 #include <sys/vfs.h>
38 #include <sys/priv.h>
39 #include <sys/libkern.h>
40
41 #include <sys/mutex.h>
42 #include <sys/vnode.h>
43 #include <sys/taskq.h>
44
45 #include <sys/ccompat.h>
46
47 MALLOC_DECLARE(M_MOUNT);
48
49 void
50 vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
51 int flags __unused)
52 {
53 struct vfsopt *opt;
54 size_t namesize;
55 int locked;
56
57 if (!(locked = mtx_owned(MNT_MTX(vfsp))))
58 MNT_ILOCK(vfsp);
59
60 if (vfsp->mnt_opt == NULL) {
61 void *opts;
62
63 MNT_IUNLOCK(vfsp);
64 opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
65 MNT_ILOCK(vfsp);
66 if (vfsp->mnt_opt == NULL) {
67 vfsp->mnt_opt = opts;
68 TAILQ_INIT(vfsp->mnt_opt);
69 } else {
70 free(opts, M_MOUNT);
71 }
72 }
73
74 MNT_IUNLOCK(vfsp);
75
76 opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
77 namesize = strlen(name) + 1;
78 opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
79 strlcpy(opt->name, name, namesize);
80 opt->pos = -1;
81 opt->seen = 1;
82 if (arg == NULL) {
83 opt->value = NULL;
84 opt->len = 0;
85 } else {
86 opt->len = strlen(arg) + 1;
87 opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
88 bcopy(arg, opt->value, opt->len);
89 }
90
91 MNT_ILOCK(vfsp);
92 TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
93 if (!locked)
94 MNT_IUNLOCK(vfsp);
95 }
96
97 void
98 vfs_clearmntopt(vfs_t *vfsp, const char *name)
99 {
100 int locked;
101
102 if (!(locked = mtx_owned(MNT_MTX(vfsp))))
103 MNT_ILOCK(vfsp);
104 vfs_deleteopt(vfsp->mnt_opt, name);
105 if (!locked)
106 MNT_IUNLOCK(vfsp);
107 }
108
109 int
110 vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
111 {
112 struct vfsoptlist *opts = vfsp->mnt_optnew;
113 int error;
114
115 if (opts == NULL)
116 return (0);
117 error = vfs_getopt(opts, opt, (void **)argp, NULL);
118 return (error != 0 ? 0 : 1);
119 }
120
121 int
122 mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
123 char *fspec, int fsflags)
124 {
125 struct vfsconf *vfsp;
126 struct mount *mp;
127 vnode_t *vp, *mvp;
128 struct ucred *cr;
129 int error;
130
131 ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
132
133 vp = *vpp;
134 *vpp = NULL;
135 error = 0;
136
137 /*
138 * Be ultra-paranoid about making sure the type and fspath
139 * variables will fit in our mp buffers, including the
140 * terminating NUL.
141 */
142 if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
143 error = ENAMETOOLONG;
144 if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
145 error = ENODEV;
146 if (error == 0 && vp->v_type != VDIR)
147 error = ENOTDIR;
148 /*
149 * We need vnode lock to protect v_mountedhere and vnode interlock
150 * to protect v_iflag.
151 */
152 if (error == 0) {
153 VI_LOCK(vp);
154 if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
155 vp->v_iflag |= VI_MOUNT;
156 else
157 error = EBUSY;
158 VI_UNLOCK(vp);
159 }
160 if (error != 0) {
161 vput(vp);
162 return (error);
163 }
164 VOP_UNLOCK1(vp);
165
166 /*
167 * Allocate and initialize the filesystem.
168 * We don't want regular user that triggered snapshot mount to be able
169 * to unmount it, so pass credentials of the parent mount.
170 */
171 mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
172
173 mp->mnt_optnew = NULL;
174 vfs_setmntopt(mp, "from", fspec, 0);
175 mp->mnt_optnew = mp->mnt_opt;
176 mp->mnt_opt = NULL;
177
178 /*
179 * Set the mount level flags.
180 */
181 mp->mnt_flag = fsflags & MNT_UPDATEMASK;
182 /*
183 * Snapshots are always read-only.
184 */
185 mp->mnt_flag |= MNT_RDONLY;
186 /*
187 * We don't want snapshots to allow access to vulnerable setuid
188 * programs, so we turn off setuid when mounting snapshots.
189 */
190 mp->mnt_flag |= MNT_NOSUID;
191 /*
192 * We don't want snapshots to be visible in regular
193 * mount(8) and df(1) output.
194 */
195 mp->mnt_flag |= MNT_IGNORE;
196 /*
197 * XXX: This is evil, but we can't mount a snapshot as a regular user.
198 * XXX: Is is safe when snapshot is mounted from within a jail?
199 */
200 cr = td->td_ucred;
201 td->td_ucred = kcred;
202 error = VFS_MOUNT(mp);
203 td->td_ucred = cr;
204
205 if (error != 0) {
206 /*
207 * Clear VI_MOUNT and decrement the use count "atomically",
208 * under the vnode lock. This is not strictly required,
209 * but makes it easier to reason about the life-cycle and
210 * ownership of the covered vnode.
211 */
212 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
213 VI_LOCK(vp);
214 vp->v_iflag &= ~VI_MOUNT;
215 VI_UNLOCK(vp);
216 vput(vp);
217 vfs_unbusy(mp);
218 vfs_freeopts(mp->mnt_optnew);
219 mp->mnt_vnodecovered = NULL;
220 vfs_mount_destroy(mp);
221 return (error);
222 }
223
224 if (mp->mnt_opt != NULL)
225 vfs_freeopts(mp->mnt_opt);
226 mp->mnt_opt = mp->mnt_optnew;
227 (void) VFS_STATFS(mp, &mp->mnt_stat);
228
229 /*
230 * Prevent external consumers of mount options from reading
231 * mnt_optnew.
232 */
233 mp->mnt_optnew = NULL;
234
235 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
236 #ifdef FREEBSD_NAMECACHE
237 cache_purge(vp);
238 #endif
239 VI_LOCK(vp);
240 vp->v_iflag &= ~VI_MOUNT;
241 VI_UNLOCK(vp);
242
243 vp->v_mountedhere = mp;
244 /* Put the new filesystem on the mount list. */
245 mtx_lock(&mountlist_mtx);
246 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
247 mtx_unlock(&mountlist_mtx);
248 vfs_event_signal(NULL, VQ_MOUNT, 0);
249 if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
250 panic("mount: lost mount");
251 VOP_UNLOCK1(vp);
252 #if __FreeBSD_version >= 1300048
253 vfs_op_exit(mp);
254 #endif
255 vfs_unbusy(mp);
256 *vpp = mvp;
257 return (0);
258 }
259
260 /*
261 * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
262 * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
263 * the file system as a result of releasing the vnode. Note, file systems
264 * already have to handle the race where the vnode is incremented before the
265 * inactive routine is called and does its locking.
266 *
267 * Warning: Excessive use of this routine can lead to performance problems.
268 * This is because taskqs throttle back allocation if too many are created.
269 */
270 void
271 vn_rele_async(vnode_t *vp, taskq_t *taskq)
272 {
273 VERIFY(vp->v_count > 0);
274 if (refcount_release_if_not_last(&vp->v_usecount)) {
275 #if __FreeBSD_version < 1300045
276 vdrop(vp);
277 #endif
278 return;
279 }
280 VERIFY(taskq_dispatch((taskq_t *)taskq,
281 (task_func_t *)vrele, vp, TQ_SLEEP) != 0);
282 }