]> git.proxmox.com Git - mirror_zfs.git/blob - module/os/linux/zfs/zpl_super.c
Fix zpl_test_super race with zfs_umount
[mirror_zfs.git] / module / os / linux / zfs / zpl_super.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
23 * Copyright (c) 2023, Datto Inc. All rights reserved.
24 */
25
26
27 #include <sys/zfs_znode.h>
28 #include <sys/zfs_vfsops.h>
29 #include <sys/zfs_vnops.h>
30 #include <sys/zfs_ctldir.h>
31 #include <sys/zpl.h>
32
33
34 static struct inode *
35 zpl_inode_alloc(struct super_block *sb)
36 {
37 struct inode *ip;
38
39 VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
40 inode_set_iversion(ip, 1);
41
42 return (ip);
43 }
44
45 static void
46 zpl_inode_destroy(struct inode *ip)
47 {
48 ASSERT(atomic_read(&ip->i_count) == 0);
49 zfs_inode_destroy(ip);
50 }
51
52 /*
53 * Called from __mark_inode_dirty() to reflect that something in the
54 * inode has changed. We use it to ensure the znode system attributes
55 * are always strictly update to date with respect to the inode.
56 */
57 #ifdef HAVE_DIRTY_INODE_WITH_FLAGS
58 static void
59 zpl_dirty_inode(struct inode *ip, int flags)
60 {
61 fstrans_cookie_t cookie;
62
63 cookie = spl_fstrans_mark();
64 zfs_dirty_inode(ip, flags);
65 spl_fstrans_unmark(cookie);
66 }
67 #else
68 static void
69 zpl_dirty_inode(struct inode *ip)
70 {
71 fstrans_cookie_t cookie;
72
73 cookie = spl_fstrans_mark();
74 zfs_dirty_inode(ip, 0);
75 spl_fstrans_unmark(cookie);
76 }
77 #endif /* HAVE_DIRTY_INODE_WITH_FLAGS */
78
79 /*
80 * When ->drop_inode() is called its return value indicates if the
81 * inode should be evicted from the inode cache. If the inode is
82 * unhashed and has no links the default policy is to evict it
83 * immediately.
84 *
85 * The ->evict_inode() callback must minimally truncate the inode pages,
86 * and call clear_inode(). For 2.6.35 and later kernels this will
87 * simply update the inode state, with the sync occurring before the
88 * truncate in evict(). For earlier kernels clear_inode() maps to
89 * end_writeback() which is responsible for completing all outstanding
90 * write back. In either case, once this is done it is safe to cleanup
91 * any remaining inode specific data via zfs_inactive().
92 * remaining filesystem specific data.
93 */
94 static void
95 zpl_evict_inode(struct inode *ip)
96 {
97 fstrans_cookie_t cookie;
98
99 cookie = spl_fstrans_mark();
100 truncate_setsize(ip, 0);
101 clear_inode(ip);
102 zfs_inactive(ip);
103 spl_fstrans_unmark(cookie);
104 }
105
106 static void
107 zpl_put_super(struct super_block *sb)
108 {
109 fstrans_cookie_t cookie;
110 int error;
111
112 cookie = spl_fstrans_mark();
113 error = -zfs_umount(sb);
114 spl_fstrans_unmark(cookie);
115 ASSERT3S(error, <=, 0);
116 }
117
118 static int
119 zpl_sync_fs(struct super_block *sb, int wait)
120 {
121 fstrans_cookie_t cookie;
122 cred_t *cr = CRED();
123 int error;
124
125 crhold(cr);
126 cookie = spl_fstrans_mark();
127 error = -zfs_sync(sb, wait, cr);
128 spl_fstrans_unmark(cookie);
129 crfree(cr);
130 ASSERT3S(error, <=, 0);
131
132 return (error);
133 }
134
135 static int
136 zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
137 {
138 fstrans_cookie_t cookie;
139 int error;
140
141 cookie = spl_fstrans_mark();
142 error = -zfs_statvfs(dentry->d_inode, statp);
143 spl_fstrans_unmark(cookie);
144 ASSERT3S(error, <=, 0);
145
146 /*
147 * If required by a 32-bit system call, dynamically scale the
148 * block size up to 16MiB and decrease the block counts. This
149 * allows for a maximum size of 64EiB to be reported. The file
150 * counts must be artificially capped at 2^32-1.
151 */
152 if (unlikely(zpl_is_32bit_api())) {
153 while (statp->f_blocks > UINT32_MAX &&
154 statp->f_bsize < SPA_MAXBLOCKSIZE) {
155 statp->f_frsize <<= 1;
156 statp->f_bsize <<= 1;
157
158 statp->f_blocks >>= 1;
159 statp->f_bfree >>= 1;
160 statp->f_bavail >>= 1;
161 }
162
163 uint64_t usedobjs = statp->f_files - statp->f_ffree;
164 statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs);
165 statp->f_files = statp->f_ffree + usedobjs;
166 }
167
168 return (error);
169 }
170
171 static int
172 zpl_remount_fs(struct super_block *sb, int *flags, char *data)
173 {
174 zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
175 fstrans_cookie_t cookie;
176 int error;
177
178 cookie = spl_fstrans_mark();
179 error = -zfs_remount(sb, flags, &zm);
180 spl_fstrans_unmark(cookie);
181 ASSERT3S(error, <=, 0);
182
183 return (error);
184 }
185
186 static int
187 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
188 {
189 int error;
190 if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
191 return (error);
192
193 char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
194 dmu_objset_name(zfsvfs->z_os, fsname);
195
196 for (int i = 0; fsname[i] != 0; i++) {
197 /*
198 * Spaces in the dataset name must be converted to their
199 * octal escape sequence for getmntent(3) to correctly
200 * parse then fsname portion of /proc/self/mounts.
201 */
202 if (fsname[i] == ' ') {
203 seq_puts(seq, "\\040");
204 } else {
205 seq_putc(seq, fsname[i]);
206 }
207 }
208
209 kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
210
211 zpl_exit(zfsvfs, FTAG);
212
213 return (0);
214 }
215
216 static int
217 zpl_show_devname(struct seq_file *seq, struct dentry *root)
218 {
219 return (__zpl_show_devname(seq, root->d_sb->s_fs_info));
220 }
221
222 static int
223 __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
224 {
225 seq_printf(seq, ",%s",
226 zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
227
228 #ifdef CONFIG_FS_POSIX_ACL
229 switch (zfsvfs->z_acl_type) {
230 case ZFS_ACLTYPE_POSIX:
231 seq_puts(seq, ",posixacl");
232 break;
233 default:
234 seq_puts(seq, ",noacl");
235 break;
236 }
237 #endif /* CONFIG_FS_POSIX_ACL */
238
239 switch (zfsvfs->z_case) {
240 case ZFS_CASE_SENSITIVE:
241 seq_puts(seq, ",casesensitive");
242 break;
243 case ZFS_CASE_INSENSITIVE:
244 seq_puts(seq, ",caseinsensitive");
245 break;
246 default:
247 seq_puts(seq, ",casemixed");
248 break;
249 }
250
251 return (0);
252 }
253
254 static int
255 zpl_show_options(struct seq_file *seq, struct dentry *root)
256 {
257 return (__zpl_show_options(seq, root->d_sb->s_fs_info));
258 }
259
260 static int
261 zpl_fill_super(struct super_block *sb, void *data, int silent)
262 {
263 zfs_mnt_t *zm = (zfs_mnt_t *)data;
264 fstrans_cookie_t cookie;
265 int error;
266
267 cookie = spl_fstrans_mark();
268 error = -zfs_domount(sb, zm, silent);
269 spl_fstrans_unmark(cookie);
270 ASSERT3S(error, <=, 0);
271
272 return (error);
273 }
274
275 static int
276 zpl_test_super(struct super_block *s, void *data)
277 {
278 zfsvfs_t *zfsvfs = s->s_fs_info;
279 objset_t *os = data;
280 /*
281 * If the os doesn't match the z_os in the super_block, assume it is
282 * not a match. Matching would imply a multimount of a dataset. It is
283 * possible that during a multimount, there is a simultaneous operation
284 * that changes the z_os, e.g., rollback, where the match will be
285 * missed, but in that case the user will get an EBUSY.
286 */
287 return (zfsvfs != NULL && os == zfsvfs->z_os);
288 }
289
290 static struct super_block *
291 zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
292 {
293 struct super_block *s;
294 objset_t *os;
295 int err;
296
297 err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
298 if (err)
299 return (ERR_PTR(-err));
300
301 /*
302 * The dsl pool lock must be released prior to calling sget().
303 * It is possible sget() may block on the lock in grab_super()
304 * while deactivate_super() holds that same lock and waits for
305 * a txg sync. If the dsl_pool lock is held over sget()
306 * this can prevent the pool sync and cause a deadlock.
307 */
308 dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
309 dsl_pool_rele(dmu_objset_pool(os), FTAG);
310
311 s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
312
313 /*
314 * Recheck with the lock held to prevent mounting the wrong dataset
315 * since z_os can be stale when the teardown lock is held.
316 *
317 * We can't do this in zpl_test_super in since it's under spinlock and
318 * also s_umount lock is not held there so it would race with
319 * zfs_umount and zfsvfs can be freed.
320 */
321 if (!IS_ERR(s) && s->s_fs_info != NULL) {
322 zfsvfs_t *zfsvfs = s->s_fs_info;
323 if (zpl_enter(zfsvfs, FTAG) == 0) {
324 if (os != zfsvfs->z_os)
325 err = -SET_ERROR(EBUSY);
326 zpl_exit(zfsvfs, FTAG);
327 } else {
328 err = -SET_ERROR(EBUSY);
329 }
330 }
331 dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
332 dsl_dataset_rele(dmu_objset_ds(os), FTAG);
333
334 if (IS_ERR(s))
335 return (ERR_CAST(s));
336
337 if (err) {
338 deactivate_locked_super(s);
339 return (ERR_PTR(err));
340 }
341
342 if (s->s_root == NULL) {
343 err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
344 if (err) {
345 deactivate_locked_super(s);
346 return (ERR_PTR(err));
347 }
348 s->s_flags |= SB_ACTIVE;
349 } else if ((flags ^ s->s_flags) & SB_RDONLY) {
350 deactivate_locked_super(s);
351 return (ERR_PTR(-EBUSY));
352 }
353
354 return (s);
355 }
356
357 static struct dentry *
358 zpl_mount(struct file_system_type *fs_type, int flags,
359 const char *osname, void *data)
360 {
361 zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
362
363 struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
364 if (IS_ERR(sb))
365 return (ERR_CAST(sb));
366
367 return (dget(sb->s_root));
368 }
369
370 static void
371 zpl_kill_sb(struct super_block *sb)
372 {
373 zfs_preumount(sb);
374 kill_anon_super(sb);
375 }
376
377 void
378 zpl_prune_sb(int64_t nr_to_scan, void *arg)
379 {
380 struct super_block *sb = (struct super_block *)arg;
381 int objects = 0;
382
383 (void) -zfs_prune(sb, nr_to_scan, &objects);
384 }
385
386 const struct super_operations zpl_super_operations = {
387 .alloc_inode = zpl_inode_alloc,
388 .destroy_inode = zpl_inode_destroy,
389 .dirty_inode = zpl_dirty_inode,
390 .write_inode = NULL,
391 .evict_inode = zpl_evict_inode,
392 .put_super = zpl_put_super,
393 .sync_fs = zpl_sync_fs,
394 .statfs = zpl_statfs,
395 .remount_fs = zpl_remount_fs,
396 .show_devname = zpl_show_devname,
397 .show_options = zpl_show_options,
398 .show_stats = NULL,
399 };
400
401 struct file_system_type zpl_fs_type = {
402 .owner = THIS_MODULE,
403 .name = ZFS_DRIVER,
404 #if defined(HAVE_IDMAP_MNT_API)
405 .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
406 #else
407 .fs_flags = FS_USERNS_MOUNT,
408 #endif
409 .mount = zpl_mount,
410 .kill_sb = zpl_kill_sb,
411 };