]> git.proxmox.com Git - mirror_zfs-debian.git/blob - module/zfs/zfs_ctldir.c
b5c4510fe8ded8e47ed4375e22919cffeb1dc533
[mirror_zfs-debian.git] / module / zfs / zfs_ctldir.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 *
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
25 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
26 * LLNL-CODE-403049.
27 * Rewritten for Linux by:
28 * Rohan Puri <rohan.puri15@gmail.com>
29 * Brian Behlendorf <behlendorf1@llnl.gov>
30 */
31
32 /*
33 * ZFS control directory (a.k.a. ".zfs")
34 *
35 * This directory provides a common location for all ZFS meta-objects.
36 * Currently, this is only the 'snapshot' and 'shares' directory, but this may
37 * expand in the future. The elements are built dynamically, as the hierarchy
38 * does not actually exist on disk.
39 *
40 * For 'snapshot', we don't want to have all snapshots always mounted, because
41 * this would take up a huge amount of space in /etc/mnttab. We have three
42 * types of objects:
43 *
44 * ctldir ------> snapshotdir -------> snapshot
45 * |
46 * |
47 * V
48 * mounted fs
49 *
50 * The 'snapshot' node contains just enough information to lookup '..' and act
51 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
52 * perform an automount of the underlying filesystem and return the
53 * corresponding inode.
54 *
55 * All mounts are handled automatically by an user mode helper which invokes
56 * the mount mount procedure. Unmounts are handled by allowing the mount
57 * point to expire so the kernel may automatically unmount it.
58 *
59 * The '.zfs', '.zfs/snapshot', and all directories created under
60 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
61 * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under).
62 *
63 * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
64 * (ie: snapshots) are complete ZFS filesystems and have their own unique
65 * zfs_sb_t. However, the fsid reported by these mounts will be the same
66 * as that used by the parent zfs_sb_t to make NFS happy.
67 */
68
69 #include <sys/types.h>
70 #include <sys/param.h>
71 #include <sys/time.h>
72 #include <sys/systm.h>
73 #include <sys/sysmacros.h>
74 #include <sys/pathname.h>
75 #include <sys/vfs.h>
76 #include <sys/vfs_opreg.h>
77 #include <sys/zfs_ctldir.h>
78 #include <sys/zfs_ioctl.h>
79 #include <sys/zfs_vfsops.h>
80 #include <sys/zfs_vnops.h>
81 #include <sys/stat.h>
82 #include <sys/dmu.h>
83 #include <sys/dsl_deleg.h>
84 #include <sys/mount.h>
85 #include <sys/zpl.h>
86 #include "zfs_namecheck.h"
87
88 /*
89 * Control Directory Tunables (.zfs)
90 */
91 int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
92
93 /*
94 * Dedicated task queue for unmounting snapshots.
95 */
96 static taskq_t *zfs_expire_taskq;
97
98 static zfs_snapentry_t *
99 zfsctl_sep_alloc(void)
100 {
101 return kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
102 }
103
104 void
105 zfsctl_sep_free(zfs_snapentry_t *sep)
106 {
107 kmem_free(sep->se_name, MAXNAMELEN);
108 kmem_free(sep->se_path, PATH_MAX);
109 kmem_free(sep, sizeof (zfs_snapentry_t));
110 }
111
112 /*
113 * Attempt to expire an automounted snapshot, unmounts are attempted every
114 * 'zfs_expire_snapshot' seconds until they succeed. The work request is
115 * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t.
116 */
117 static void
118 zfsctl_expire_snapshot(void *data)
119 {
120 zfs_snapentry_t *sep = (zfs_snapentry_t *)data;
121 zfs_sb_t *zsb = ITOZSB(sep->se_inode);
122 int error;
123
124 error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE);
125 if (error == EBUSY)
126 sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
127 zfsctl_expire_snapshot, sep, TQ_SLEEP,
128 ddi_get_lbolt() + zfs_expire_snapshot * HZ);
129 }
130
131 int
132 snapentry_compare(const void *a, const void *b)
133 {
134 const zfs_snapentry_t *sa = a;
135 const zfs_snapentry_t *sb = b;
136 int ret = strcmp(sa->se_name, sb->se_name);
137
138 if (ret < 0)
139 return (-1);
140 else if (ret > 0)
141 return (1);
142 else
143 return (0);
144 }
145
146 boolean_t
147 zfsctl_is_node(struct inode *ip)
148 {
149 return (ITOZ(ip)->z_is_ctldir);
150 }
151
152 boolean_t
153 zfsctl_is_snapdir(struct inode *ip)
154 {
155 return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
156 }
157
158 /*
159 * Allocate a new inode with the passed id and ops.
160 */
161 static struct inode *
162 zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
163 const struct file_operations *fops, const struct inode_operations *ops)
164 {
165 struct timespec now = current_fs_time(zsb->z_sb);
166 struct inode *ip;
167 znode_t *zp;
168
169 ip = new_inode(zsb->z_sb);
170 if (ip == NULL)
171 return (NULL);
172
173 zp = ITOZ(ip);
174 ASSERT3P(zp->z_dirlocks, ==, NULL);
175 ASSERT3P(zp->z_acl_cached, ==, NULL);
176 ASSERT3P(zp->z_xattr_cached, ==, NULL);
177 zp->z_id = id;
178 zp->z_unlinked = 0;
179 zp->z_atime_dirty = 0;
180 zp->z_zn_prefetch = 0;
181 zp->z_moved = 0;
182 zp->z_sa_hdl = NULL;
183 zp->z_blksz = 0;
184 zp->z_seq = 0;
185 zp->z_mapcnt = 0;
186 zp->z_gen = 0;
187 zp->z_size = 0;
188 zp->z_atime[0] = 0;
189 zp->z_atime[1] = 0;
190 zp->z_links = 0;
191 zp->z_pflags = 0;
192 zp->z_uid = 0;
193 zp->z_gid = 0;
194 zp->z_mode = 0;
195 zp->z_sync_cnt = 0;
196 zp->z_is_zvol = B_FALSE;
197 zp->z_is_mapped = B_FALSE;
198 zp->z_is_ctldir = B_TRUE;
199 zp->z_is_sa = B_FALSE;
200 ip->i_ino = id;
201 ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
202 ip->i_uid = 0;
203 ip->i_gid = 0;
204 ip->i_blkbits = SPA_MINBLOCKSHIFT;
205 ip->i_atime = now;
206 ip->i_mtime = now;
207 ip->i_ctime = now;
208 ip->i_fop = fops;
209 ip->i_op = ops;
210
211 if (insert_inode_locked(ip)) {
212 unlock_new_inode(ip);
213 iput(ip);
214 return (NULL);
215 }
216
217 mutex_enter(&zsb->z_znodes_lock);
218 list_insert_tail(&zsb->z_all_znodes, zp);
219 zsb->z_nr_znodes++;
220 membar_producer();
221 mutex_exit(&zsb->z_znodes_lock);
222
223 unlock_new_inode(ip);
224
225 return (ip);
226 }
227
228 /*
229 * Lookup the inode with given id, it will be allocated if needed.
230 */
231 static struct inode *
232 zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id,
233 const struct file_operations *fops, const struct inode_operations *ops)
234 {
235 struct inode *ip = NULL;
236
237 while (ip == NULL) {
238 ip = ilookup(zsb->z_sb, (unsigned long)id);
239 if (ip)
240 break;
241
242 /* May fail due to concurrent zfsctl_inode_alloc() */
243 ip = zfsctl_inode_alloc(zsb, id, fops, ops);
244 }
245
246 return (ip);
247 }
248
249 /*
250 * Free zfsctl inode specific structures, currently there are none.
251 */
252 void
253 zfsctl_inode_destroy(struct inode *ip)
254 {
255 return;
256 }
257
258 /*
259 * An inode is being evicted from the cache.
260 */
261 void
262 zfsctl_inode_inactive(struct inode *ip)
263 {
264 if (zfsctl_is_snapdir(ip))
265 zfsctl_snapdir_inactive(ip);
266 }
267
268 /*
269 * Create the '.zfs' directory. This directory is cached as part of the VFS
270 * structure. This results in a hold on the zfs_sb_t. The code in zfs_umount()
271 * therefore checks against a vfs_count of 2 instead of 1. This reference
272 * is removed when the ctldir is destroyed in the unmount. All other entities
273 * under the '.zfs' directory are created dynamically as needed.
274 *
275 * Because the dynamically created '.zfs' directory entries assume the use
276 * of 64-bit inode numbers this support must be disabled on 32-bit systems.
277 */
278 int
279 zfsctl_create(zfs_sb_t *zsb)
280 {
281 #if defined(CONFIG_64BIT)
282 ASSERT(zsb->z_ctldir == NULL);
283
284 zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT,
285 &zpl_fops_root, &zpl_ops_root);
286 if (zsb->z_ctldir == NULL)
287 return (ENOENT);
288
289 return (0);
290 #else
291 return (EOPNOTSUPP);
292 #endif /* CONFIG_64BIT */
293 }
294
295 /*
296 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
297 */
298 void
299 zfsctl_destroy(zfs_sb_t *zsb)
300 {
301 iput(zsb->z_ctldir);
302 zsb->z_ctldir = NULL;
303 }
304
305 /*
306 * Given a root znode, retrieve the associated .zfs directory.
307 * Add a hold to the vnode and return it.
308 */
309 struct inode *
310 zfsctl_root(znode_t *zp)
311 {
312 ASSERT(zfs_has_ctldir(zp));
313 igrab(ZTOZSB(zp)->z_ctldir);
314 return (ZTOZSB(zp)->z_ctldir);
315 }
316
317 /*ARGSUSED*/
318 int
319 zfsctl_fid(struct inode *ip, fid_t *fidp)
320 {
321 znode_t *zp = ITOZ(ip);
322 zfs_sb_t *zsb = ITOZSB(ip);
323 uint64_t object = zp->z_id;
324 zfid_short_t *zfid;
325 int i;
326
327 ZFS_ENTER(zsb);
328
329 if (fidp->fid_len < SHORT_FID_LEN) {
330 fidp->fid_len = SHORT_FID_LEN;
331 ZFS_EXIT(zsb);
332 return (ENOSPC);
333 }
334
335 zfid = (zfid_short_t *)fidp;
336
337 zfid->zf_len = SHORT_FID_LEN;
338
339 for (i = 0; i < sizeof (zfid->zf_object); i++)
340 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
341
342 /* .zfs znodes always have a generation number of 0 */
343 for (i = 0; i < sizeof (zfid->zf_gen); i++)
344 zfid->zf_gen[i] = 0;
345
346 ZFS_EXIT(zsb);
347 return (0);
348 }
349
350 static int
351 zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname)
352 {
353 objset_t *os = ITOZSB(ip)->z_os;
354
355 if (snapshot_namecheck(name, NULL, NULL) != 0)
356 return (EILSEQ);
357
358 dmu_objset_name(os, zname);
359 if ((strlen(zname) + 1 + strlen(name)) >= len)
360 return (ENAMETOOLONG);
361
362 (void) strcat(zname, "@");
363 (void) strcat(zname, name);
364
365 return (0);
366 }
367
368 static int
369 zfsctl_snapshot_zpath(struct path *path, int len, char *zpath)
370 {
371 char *path_buffer, *path_ptr;
372 int path_len, error = 0;
373
374 path_buffer = kmem_alloc(len, KM_SLEEP);
375
376 path_ptr = d_path(path, path_buffer, len);
377 if (IS_ERR(path_ptr)) {
378 error = -PTR_ERR(path_ptr);
379 goto out;
380 }
381
382 path_len = path_buffer + len - 1 - path_ptr;
383 if (path_len > len) {
384 error = EFAULT;
385 goto out;
386 }
387
388 memcpy(zpath, path_ptr, path_len);
389 zpath[path_len] = '\0';
390 out:
391 kmem_free(path_buffer, len);
392
393 return (error);
394 }
395
396 /*
397 * Special case the handling of "..".
398 */
399 /* ARGSUSED */
400 int
401 zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
402 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
403 {
404 zfs_sb_t *zsb = ITOZSB(dip);
405 int error = 0;
406
407 ZFS_ENTER(zsb);
408
409 if (strcmp(name, "..") == 0) {
410 *ipp = dip->i_sb->s_root->d_inode;
411 } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
412 *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR,
413 &zpl_fops_snapdir, &zpl_ops_snapdir);
414 } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
415 *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES,
416 &zpl_fops_shares, &zpl_ops_shares);
417 } else {
418 *ipp = NULL;
419 }
420
421 if (*ipp == NULL)
422 error = ENOENT;
423
424 ZFS_EXIT(zsb);
425
426 return (error);
427 }
428
429 /*
430 * Lookup entry point for the 'snapshot' directory. Try to open the
431 * snapshot if it exist, creating the pseudo filesystem inode as necessary.
432 * Perform a mount of the associated dataset on top of the inode.
433 */
434 /* ARGSUSED */
435 int
436 zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
437 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
438 {
439 zfs_sb_t *zsb = ITOZSB(dip);
440 uint64_t id;
441 int error;
442
443 ZFS_ENTER(zsb);
444
445 error = dmu_snapshot_id(zsb->z_os, name, &id);
446 if (error) {
447 ZFS_EXIT(zsb);
448 return (error);
449 }
450
451 *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id,
452 &simple_dir_operations, &simple_dir_inode_operations);
453 if (*ipp) {
454 #ifdef HAVE_AUTOMOUNT
455 (*ipp)->i_flags |= S_AUTOMOUNT;
456 #endif /* HAVE_AUTOMOUNT */
457 } else {
458 error = ENOENT;
459 }
460
461 ZFS_EXIT(zsb);
462
463 return (error);
464 }
465
466 static void
467 zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name)
468 {
469 avl_index_t where;
470
471 ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock));
472 ASSERT(sep != NULL);
473
474 /*
475 * Change the name in the AVL tree.
476 */
477 avl_remove(&zsb->z_ctldir_snaps, sep);
478 (void) strcpy(sep->se_name, name);
479 VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL);
480 avl_insert(&zsb->z_ctldir_snaps, sep, where);
481 }
482
483 /*
484 * Renaming a directory under '.zfs/snapshot' will automatically trigger
485 * a rename of the snapshot to the new given name. The rename is confined
486 * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
487 */
488 /*ARGSUSED*/
489 int
490 zfsctl_snapdir_rename(struct inode *sdip, char *sname,
491 struct inode *tdip, char *tname, cred_t *cr, int flags)
492 {
493 zfs_sb_t *zsb = ITOZSB(sdip);
494 zfs_snapentry_t search, *sep;
495 avl_index_t where;
496 char *to, *from, *real;
497 int error;
498
499 ZFS_ENTER(zsb);
500
501 to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
502 from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
503 real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
504
505 if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
506 error = dmu_snapshot_realname(zsb->z_os, sname, real,
507 MAXNAMELEN, NULL);
508 if (error == 0) {
509 sname = real;
510 } else if (error != ENOTSUP) {
511 goto out;
512 }
513 }
514
515 error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from);
516 if (!error)
517 error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to);
518 if (!error)
519 error = zfs_secpolicy_rename_perms(from, to, cr);
520 if (error)
521 goto out;
522
523 /*
524 * Cannot move snapshots out of the snapdir.
525 */
526 if (sdip != tdip) {
527 error = EINVAL;
528 goto out;
529 }
530
531 /*
532 * No-op when names are identical.
533 */
534 if (strcmp(sname, tname) == 0) {
535 error = 0;
536 goto out;
537 }
538
539 mutex_enter(&zsb->z_ctldir_lock);
540
541 error = dmu_objset_rename(from, to, B_FALSE);
542 if (error)
543 goto out_unlock;
544
545 search.se_name = (char *)sname;
546 sep = avl_find(&zsb->z_ctldir_snaps, &search, &where);
547 if (sep)
548 zfsctl_rename_snap(zsb, sep, tname);
549
550 out_unlock:
551 mutex_exit(&zsb->z_ctldir_lock);
552 out:
553 kmem_free(from, MAXNAMELEN);
554 kmem_free(to, MAXNAMELEN);
555 kmem_free(real, MAXNAMELEN);
556
557 ZFS_EXIT(zsb);
558
559 return (error);
560 }
561
562 /*
563 * Removing a directory under '.zfs/snapshot' will automatically trigger
564 * the removal of the snapshot with the given name.
565 */
566 /* ARGSUSED */
567 int
568 zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
569 {
570 zfs_sb_t *zsb = ITOZSB(dip);
571 char *snapname, *real;
572 int error;
573
574 ZFS_ENTER(zsb);
575
576 snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
577 real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
578
579 if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
580 error = dmu_snapshot_realname(zsb->z_os, name, real,
581 MAXNAMELEN, NULL);
582 if (error == 0) {
583 name = real;
584 } else if (error != ENOTSUP) {
585 goto out;
586 }
587 }
588
589 error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname);
590 if (!error)
591 error = zfs_secpolicy_destroy_perms(snapname, cr);
592 if (error)
593 goto out;
594
595 error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE);
596 if ((error == 0) || (error == ENOENT))
597 error = dmu_objset_destroy(snapname, B_FALSE);
598 out:
599 kmem_free(snapname, MAXNAMELEN);
600 kmem_free(real, MAXNAMELEN);
601
602 ZFS_EXIT(zsb);
603
604 return (error);
605 }
606
607 /*
608 * Creating a directory under '.zfs/snapshot' will automatically trigger
609 * the creation of a new snapshot with the given name.
610 */
611 /* ARGSUSED */
612 int
613 zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
614 struct inode **ipp, cred_t *cr, int flags)
615 {
616 zfs_sb_t *zsb = ITOZSB(dip);
617 char *dsname;
618 int error;
619
620 dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
621
622 if (snapshot_namecheck(dirname, NULL, NULL) != 0) {
623 error = EILSEQ;
624 goto out;
625 }
626
627 dmu_objset_name(zsb->z_os, dsname);
628
629 error = zfs_secpolicy_snapshot_perms(dsname, cr);
630 if (error)
631 goto out;
632
633 if (error == 0) {
634 error = dmu_objset_snapshot(dsname, dirname,
635 NULL, NULL, B_FALSE, B_FALSE, -1);
636 if (error)
637 goto out;
638
639 error = zfsctl_snapdir_lookup(dip, dirname, ipp,
640 0, cr, NULL, NULL);
641 }
642 out:
643 kmem_free(dsname, MAXNAMELEN);
644
645 return (error);
646 }
647
648 /*
649 * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed
650 * from the snapshot list. This will normally happen as part of the auto
651 * unmount, however in the case of a manual snapshot unmount this will be
652 * the only notification we receive.
653 */
654 void
655 zfsctl_snapdir_inactive(struct inode *ip)
656 {
657 zfs_sb_t *zsb = ITOZSB(ip);
658 zfs_snapentry_t *sep, *next;
659
660 mutex_enter(&zsb->z_ctldir_lock);
661
662 sep = avl_first(&zsb->z_ctldir_snaps);
663 while (sep != NULL) {
664 next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
665
666 if (sep->se_inode == ip) {
667 avl_remove(&zsb->z_ctldir_snaps, sep);
668 taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
669 zfsctl_sep_free(sep);
670 break;
671 }
672 sep = next;
673 }
674
675 mutex_exit(&zsb->z_ctldir_lock);
676 }
677
678 /*
679 * Attempt to unmount a snapshot by making a call to user space.
680 * There is no assurance that this can or will succeed, is just a
681 * best effort. In the case where it does fail, perhaps because
682 * it's in use, the unmount will fail harmlessly.
683 */
684 #define SET_UNMOUNT_CMD \
685 "exec 0</dev/null " \
686 " 1>/dev/null " \
687 " 2>/dev/null; " \
688 "umount -t zfs -n '%s%s'"
689
690 static int
691 __zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags)
692 {
693 char *argv[] = { "/bin/sh", "-c", NULL, NULL };
694 char *envp[] = { NULL };
695 int error;
696
697 argv[2] = kmem_asprintf(SET_UNMOUNT_CMD,
698 flags & MNT_FORCE ? "-f " : "", sep->se_path);
699 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
700 strfree(argv[2]);
701
702 /*
703 * The umount system utility will return 256 on error. We must
704 * assume this error is because the file system is busy so it is
705 * converted to the more sensible EBUSY.
706 */
707 if (error)
708 error = EBUSY;
709
710 /*
711 * This was the result of a manual unmount, cancel the delayed work
712 * to prevent zfsctl_expire_snapshot() from attempting a unmount.
713 */
714 if ((error == 0) && !(flags & MNT_EXPIRE))
715 taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
716
717
718 return (error);
719 }
720
721 int
722 zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags)
723 {
724 zfs_snapentry_t search;
725 zfs_snapentry_t *sep;
726 int error = 0;
727
728 mutex_enter(&zsb->z_ctldir_lock);
729
730 search.se_name = name;
731 sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
732 if (sep) {
733 avl_remove(&zsb->z_ctldir_snaps, sep);
734 error = __zfsctl_unmount_snapshot(sep, flags);
735 if (error == EBUSY)
736 avl_add(&zsb->z_ctldir_snaps, sep);
737 else
738 zfsctl_sep_free(sep);
739 } else {
740 error = ENOENT;
741 }
742
743 mutex_exit(&zsb->z_ctldir_lock);
744 ASSERT3S(error, >=, 0);
745
746 return (error);
747 }
748
749 /*
750 * Traverse all mounted snapshots and attempt to unmount them. This
751 * is best effort, on failure EEXIST is returned and count will be set
752 * to the number of file snapshots which could not be unmounted.
753 */
754 int
755 zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count)
756 {
757 zfs_snapentry_t *sep, *next;
758 int error = 0;
759
760 *count = 0;
761
762 ASSERT(zsb->z_ctldir != NULL);
763 mutex_enter(&zsb->z_ctldir_lock);
764
765 sep = avl_first(&zsb->z_ctldir_snaps);
766 while (sep != NULL) {
767 next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
768 avl_remove(&zsb->z_ctldir_snaps, sep);
769 error = __zfsctl_unmount_snapshot(sep, flags);
770 if (error == EBUSY) {
771 avl_add(&zsb->z_ctldir_snaps, sep);
772 (*count)++;
773 } else {
774 zfsctl_sep_free(sep);
775 }
776
777 sep = next;
778 }
779
780 mutex_exit(&zsb->z_ctldir_lock);
781
782 return ((*count > 0) ? EEXIST : 0);
783 }
784
785 #define SET_MOUNT_CMD \
786 "exec 0</dev/null " \
787 " 1>/dev/null " \
788 " 2>/dev/null; " \
789 "mount -t zfs -n '%s' '%s'"
790
791 int
792 zfsctl_mount_snapshot(struct path *path, int flags)
793 {
794 struct dentry *dentry = path->dentry;
795 struct inode *ip = dentry->d_inode;
796 zfs_sb_t *zsb = ITOZSB(ip);
797 char *full_name, *full_path;
798 zfs_snapentry_t *sep;
799 zfs_snapentry_t search;
800 char *argv[] = { "/bin/sh", "-c", NULL, NULL };
801 char *envp[] = { NULL };
802 int error;
803
804 ZFS_ENTER(zsb);
805
806 full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
807 full_path = kmem_zalloc(PATH_MAX, KM_SLEEP);
808
809 error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name);
810 if (error)
811 goto error;
812
813 error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path);
814 if (error)
815 goto error;
816
817 /*
818 * Attempt to mount the snapshot from user space. Normally this
819 * would be done using the vfs_kern_mount() function, however that
820 * function is marked GPL-only and cannot be used. On error we
821 * careful to log the real error to the console and return EISDIR
822 * to safely abort the automount. This should be very rare.
823 */
824 argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path);
825 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
826 strfree(argv[2]);
827 if (error) {
828 printk("ZFS: Unable to automount %s at %s: %d\n",
829 full_name, full_path, error);
830 error = EISDIR;
831 goto error;
832 }
833
834 mutex_enter(&zsb->z_ctldir_lock);
835
836 /*
837 * Ensure a previous entry does not exist, if it does safely remove
838 * it any cancel the outstanding expiration. This can occur when a
839 * snapshot is manually unmounted and then an automount is triggered.
840 */
841 search.se_name = full_name;
842 sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
843 if (sep) {
844 avl_remove(&zsb->z_ctldir_snaps, sep);
845 taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
846 zfsctl_sep_free(sep);
847 }
848
849 sep = zfsctl_sep_alloc();
850 sep->se_name = full_name;
851 sep->se_path = full_path;
852 sep->se_inode = ip;
853 avl_add(&zsb->z_ctldir_snaps, sep);
854
855 sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
856 zfsctl_expire_snapshot, sep, TQ_SLEEP,
857 ddi_get_lbolt() + zfs_expire_snapshot * HZ);
858
859 mutex_exit(&zsb->z_ctldir_lock);
860 error:
861 if (error) {
862 kmem_free(full_name, MAXNAMELEN);
863 kmem_free(full_path, PATH_MAX);
864 }
865
866 ZFS_EXIT(zsb);
867
868 return (error);
869 }
870
871 /*
872 * Check if this super block has a matching objset id.
873 */
874 static int
875 zfsctl_test_super(struct super_block *sb, void *objsetidp)
876 {
877 zfs_sb_t *zsb = sb->s_fs_info;
878 uint64_t objsetid = *(uint64_t *)objsetidp;
879
880 return (dmu_objset_id(zsb->z_os) == objsetid);
881 }
882
883 /*
884 * Prevent a new super block from being allocated if an existing one
885 * could not be located. We only want to preform a lookup operation.
886 */
887 static int
888 zfsctl_set_super(struct super_block *sb, void *objsetidp)
889 {
890 return (-EEXIST);
891 }
892
893 int
894 zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
895 {
896 zfs_sb_t *zsb = sb->s_fs_info;
897 struct super_block *sbp;
898 zfs_snapentry_t *sep;
899 uint64_t id;
900 int error;
901
902 ASSERT(zsb->z_ctldir != NULL);
903
904 mutex_enter(&zsb->z_ctldir_lock);
905
906 /*
907 * Verify that the snapshot is mounted.
908 */
909 sep = avl_first(&zsb->z_ctldir_snaps);
910 while (sep != NULL) {
911 error = dmu_snapshot_id(zsb->z_os, sep->se_name, &id);
912 if (error)
913 goto out;
914
915 if (id == objsetid)
916 break;
917
918 sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
919 }
920
921 if (sep != NULL) {
922 /*
923 * Lookup the mounted root rather than the covered mount
924 * point. This may fail if the snapshot has just been
925 * unmounted by an unrelated user space process. This
926 * race cannot occur to an expired mount point because
927 * we hold the zsb->z_ctldir_lock to prevent the race.
928 */
929 sbp = zpl_sget(&zpl_fs_type, zfsctl_test_super,
930 zfsctl_set_super, 0, &id);
931 if (IS_ERR(sbp)) {
932 error = -PTR_ERR(sbp);
933 } else {
934 *zsbp = sbp->s_fs_info;
935 deactivate_super(sbp);
936 }
937 } else {
938 error = EINVAL;
939 }
940 out:
941 mutex_exit(&zsb->z_ctldir_lock);
942 ASSERT3S(error, >=, 0);
943
944 return (error);
945 }
946
947 /* ARGSUSED */
948 int
949 zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
950 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
951 {
952 zfs_sb_t *zsb = ITOZSB(dip);
953 struct inode *ip;
954 znode_t *dzp;
955 int error;
956
957 ZFS_ENTER(zsb);
958
959 if (zsb->z_shares_dir == 0) {
960 ZFS_EXIT(zsb);
961 return (ENOTSUP);
962 }
963
964 error = zfs_zget(zsb, zsb->z_shares_dir, &dzp);
965 if (error) {
966 ZFS_EXIT(zsb);
967 return (error);
968 }
969
970 error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
971
972 iput(ZTOI(dzp));
973 ZFS_EXIT(zsb);
974
975 return (error);
976 }
977
978
979 /*
980 * Initialize the various pieces we'll need to create and manipulate .zfs
981 * directories. Currently this is unused but available.
982 */
983 void
984 zfsctl_init(void)
985 {
986 zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri,
987 1, 8, TASKQ_PREPOPULATE);
988 }
989
990 /*
991 * Cleanup the various pieces we needed for .zfs directories. In particular
992 * ensure the expiry timer is canceled safely.
993 */
994 void
995 zfsctl_fini(void)
996 {
997 taskq_destroy(zfs_expire_taskq);
998 }
999
1000 module_param(zfs_expire_snapshot, int, 0644);
1001 MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");