]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zfs_ctldir.c
Evict meta data from ghost lists + l2arc headers
[mirror_zfs.git] / module / zfs / zfs_ctldir.c
CommitLineData
ebe7e575
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 *
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
25 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
26 * LLNL-CODE-403049.
27 * Rewritten for Linux by:
28 * Rohan Puri <rohan.puri15@gmail.com>
29 * Brian Behlendorf <behlendorf1@llnl.gov>
30 */
31
32/*
33 * ZFS control directory (a.k.a. ".zfs")
34 *
35 * This directory provides a common location for all ZFS meta-objects.
36 * Currently, this is only the 'snapshot' and 'shares' directory, but this may
37 * expand in the future. The elements are built dynamically, as the hierarchy
38 * does not actually exist on disk.
39 *
40 * For 'snapshot', we don't want to have all snapshots always mounted, because
41 * this would take up a huge amount of space in /etc/mnttab. We have three
42 * types of objects:
43 *
44 * ctldir ------> snapshotdir -------> snapshot
45 * |
46 * |
47 * V
48 * mounted fs
49 *
50 * The 'snapshot' node contains just enough information to lookup '..' and act
51 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
52 * perform an automount of the underlying filesystem and return the
53 * corresponding inode.
54 *
55 * All mounts are handled automatically by an user mode helper which invokes
56 * the mount mount procedure. Unmounts are handled by allowing the mount
57 * point to expire so the kernel may automatically unmount it.
58 *
59 * The '.zfs', '.zfs/snapshot', and all directories created under
60 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
61 * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under).
62 *
63 * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
64 * (ie: snapshots) are complete ZFS filesystems and have their own unique
65 * zfs_sb_t. However, the fsid reported by these mounts will be the same
66 * as that used by the parent zfs_sb_t to make NFS happy.
67 */
68
69#include <sys/types.h>
70#include <sys/param.h>
71#include <sys/time.h>
72#include <sys/systm.h>
73#include <sys/sysmacros.h>
74#include <sys/pathname.h>
75#include <sys/vfs.h>
76#include <sys/vfs_opreg.h>
77#include <sys/zfs_ctldir.h>
78#include <sys/zfs_ioctl.h>
79#include <sys/zfs_vfsops.h>
80#include <sys/zfs_vnops.h>
81#include <sys/stat.h>
82#include <sys/dmu.h>
83#include <sys/dsl_deleg.h>
84#include <sys/mount.h>
85#include <sys/zpl.h>
86#include "zfs_namecheck.h"
87
88/*
89 * Control Directory Tunables (.zfs)
90 */
91int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
92
2ae10319
BB
93/*
94 * Dedicated task queue for unmounting snapshots.
95 */
96static taskq_t *zfs_expire_taskq;
97
ebe7e575
BB
98static zfs_snapentry_t *
99zfsctl_sep_alloc(void)
100{
101 return kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
102}
103
104void
105zfsctl_sep_free(zfs_snapentry_t *sep)
106{
107 kmem_free(sep->se_name, MAXNAMELEN);
108 kmem_free(sep->se_path, PATH_MAX);
109 kmem_free(sep, sizeof (zfs_snapentry_t));
110}
111
112/*
113 * Attempt to expire an automounted snapshot, unmounts are attempted every
114 * 'zfs_expire_snapshot' seconds until they succeed. The work request is
115 * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t.
116 */
117static void
118zfsctl_expire_snapshot(void *data)
119{
2ae10319
BB
120 zfs_snapentry_t *sep = (zfs_snapentry_t *)data;
121 zfs_sb_t *zsb = ITOZSB(sep->se_inode);
ebe7e575
BB
122 int error;
123
ebe7e575
BB
124 error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE);
125 if (error == EBUSY)
2ae10319
BB
126 sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
127 zfsctl_expire_snapshot, sep, TQ_SLEEP,
128 ddi_get_lbolt() + zfs_expire_snapshot * HZ);
ebe7e575
BB
129}
130
131int
132snapentry_compare(const void *a, const void *b)
133{
134 const zfs_snapentry_t *sa = a;
135 const zfs_snapentry_t *sb = b;
136 int ret = strcmp(sa->se_name, sb->se_name);
137
138 if (ret < 0)
139 return (-1);
140 else if (ret > 0)
141 return (1);
142 else
143 return (0);
144}
145
146boolean_t
147zfsctl_is_node(struct inode *ip)
148{
149 return (ITOZ(ip)->z_is_ctldir);
150}
151
152boolean_t
153zfsctl_is_snapdir(struct inode *ip)
154{
155 return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
156}
157
158/*
159 * Allocate a new inode with the passed id and ops.
160 */
161static struct inode *
162zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
163 const struct file_operations *fops, const struct inode_operations *ops)
164{
165 struct timespec now = current_fs_time(zsb->z_sb);
166 struct inode *ip;
167 znode_t *zp;
168
169 ip = new_inode(zsb->z_sb);
170 if (ip == NULL)
171 return (NULL);
172
173 zp = ITOZ(ip);
174 ASSERT3P(zp->z_dirlocks, ==, NULL);
175 ASSERT3P(zp->z_acl_cached, ==, NULL);
176 ASSERT3P(zp->z_xattr_cached, ==, NULL);
177 zp->z_id = id;
178 zp->z_unlinked = 0;
179 zp->z_atime_dirty = 0;
180 zp->z_zn_prefetch = 0;
181 zp->z_moved = 0;
182 zp->z_sa_hdl = NULL;
183 zp->z_blksz = 0;
184 zp->z_seq = 0;
185 zp->z_mapcnt = 0;
186 zp->z_gen = 0;
187 zp->z_size = 0;
188 zp->z_atime[0] = 0;
189 zp->z_atime[1] = 0;
190 zp->z_links = 0;
191 zp->z_pflags = 0;
192 zp->z_uid = 0;
193 zp->z_gid = 0;
194 zp->z_mode = 0;
195 zp->z_sync_cnt = 0;
196 zp->z_is_zvol = B_FALSE;
197 zp->z_is_mapped = B_FALSE;
198 zp->z_is_ctldir = B_TRUE;
199 zp->z_is_sa = B_FALSE;
7b3e34ba 200 zp->z_is_stale = B_FALSE;
ebe7e575
BB
201 ip->i_ino = id;
202 ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
203 ip->i_uid = 0;
204 ip->i_gid = 0;
205 ip->i_blkbits = SPA_MINBLOCKSHIFT;
206 ip->i_atime = now;
207 ip->i_mtime = now;
208 ip->i_ctime = now;
209 ip->i_fop = fops;
210 ip->i_op = ops;
211
212 if (insert_inode_locked(ip)) {
213 unlock_new_inode(ip);
214 iput(ip);
215 return (NULL);
216 }
217
218 mutex_enter(&zsb->z_znodes_lock);
219 list_insert_tail(&zsb->z_all_znodes, zp);
9ed86e7c 220 zsb->z_nr_znodes++;
ebe7e575
BB
221 membar_producer();
222 mutex_exit(&zsb->z_znodes_lock);
223
224 unlock_new_inode(ip);
225
226 return (ip);
227}
228
229/*
230 * Lookup the inode with given id, it will be allocated if needed.
231 */
232static struct inode *
fc173c85 233zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id,
ebe7e575
BB
234 const struct file_operations *fops, const struct inode_operations *ops)
235{
236 struct inode *ip = NULL;
237
238 while (ip == NULL) {
fc173c85 239 ip = ilookup(zsb->z_sb, (unsigned long)id);
ebe7e575
BB
240 if (ip)
241 break;
242
243 /* May fail due to concurrent zfsctl_inode_alloc() */
244 ip = zfsctl_inode_alloc(zsb, id, fops, ops);
245 }
246
247 return (ip);
248}
249
250/*
251 * Free zfsctl inode specific structures, currently there are none.
252 */
253void
254zfsctl_inode_destroy(struct inode *ip)
255{
256 return;
257}
258
259/*
260 * An inode is being evicted from the cache.
261 */
262void
263zfsctl_inode_inactive(struct inode *ip)
264{
265 if (zfsctl_is_snapdir(ip))
266 zfsctl_snapdir_inactive(ip);
267}
268
269/*
270 * Create the '.zfs' directory. This directory is cached as part of the VFS
271 * structure. This results in a hold on the zfs_sb_t. The code in zfs_umount()
272 * therefore checks against a vfs_count of 2 instead of 1. This reference
273 * is removed when the ctldir is destroyed in the unmount. All other entities
274 * under the '.zfs' directory are created dynamically as needed.
fc173c85
BB
275 *
276 * Because the dynamically created '.zfs' directory entries assume the use
277 * of 64-bit inode numbers this support must be disabled on 32-bit systems.
ebe7e575
BB
278 */
279int
280zfsctl_create(zfs_sb_t *zsb)
281{
fc173c85 282#if defined(CONFIG_64BIT)
ebe7e575
BB
283 ASSERT(zsb->z_ctldir == NULL);
284
285 zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT,
286 &zpl_fops_root, &zpl_ops_root);
287 if (zsb->z_ctldir == NULL)
288 return (ENOENT);
289
290 return (0);
fc173c85
BB
291#else
292 return (EOPNOTSUPP);
293#endif /* CONFIG_64BIT */
ebe7e575
BB
294}
295
296/*
297 * Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
298 */
299void
300zfsctl_destroy(zfs_sb_t *zsb)
301{
302 iput(zsb->z_ctldir);
303 zsb->z_ctldir = NULL;
304}
305
306/*
307 * Given a root znode, retrieve the associated .zfs directory.
308 * Add a hold to the vnode and return it.
309 */
310struct inode *
311zfsctl_root(znode_t *zp)
312{
313 ASSERT(zfs_has_ctldir(zp));
314 igrab(ZTOZSB(zp)->z_ctldir);
315 return (ZTOZSB(zp)->z_ctldir);
316}
317
318/*ARGSUSED*/
319int
320zfsctl_fid(struct inode *ip, fid_t *fidp)
321{
322 znode_t *zp = ITOZ(ip);
323 zfs_sb_t *zsb = ITOZSB(ip);
324 uint64_t object = zp->z_id;
325 zfid_short_t *zfid;
326 int i;
327
328 ZFS_ENTER(zsb);
329
330 if (fidp->fid_len < SHORT_FID_LEN) {
331 fidp->fid_len = SHORT_FID_LEN;
332 ZFS_EXIT(zsb);
333 return (ENOSPC);
334 }
335
336 zfid = (zfid_short_t *)fidp;
337
338 zfid->zf_len = SHORT_FID_LEN;
339
340 for (i = 0; i < sizeof (zfid->zf_object); i++)
341 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
342
343 /* .zfs znodes always have a generation number of 0 */
344 for (i = 0; i < sizeof (zfid->zf_gen); i++)
345 zfid->zf_gen[i] = 0;
346
347 ZFS_EXIT(zsb);
348 return (0);
349}
350
351static int
352zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname)
353{
354 objset_t *os = ITOZSB(ip)->z_os;
355
356 if (snapshot_namecheck(name, NULL, NULL) != 0)
357 return (EILSEQ);
358
359 dmu_objset_name(os, zname);
360 if ((strlen(zname) + 1 + strlen(name)) >= len)
361 return (ENAMETOOLONG);
362
363 (void) strcat(zname, "@");
364 (void) strcat(zname, name);
365
366 return (0);
367}
368
369static int
370zfsctl_snapshot_zpath(struct path *path, int len, char *zpath)
371{
372 char *path_buffer, *path_ptr;
373 int path_len, error = 0;
374
375 path_buffer = kmem_alloc(len, KM_SLEEP);
376
377 path_ptr = d_path(path, path_buffer, len);
378 if (IS_ERR(path_ptr)) {
379 error = -PTR_ERR(path_ptr);
380 goto out;
381 }
382
383 path_len = path_buffer + len - 1 - path_ptr;
384 if (path_len > len) {
385 error = EFAULT;
386 goto out;
387 }
388
389 memcpy(zpath, path_ptr, path_len);
390 zpath[path_len] = '\0';
391out:
392 kmem_free(path_buffer, len);
393
394 return (error);
395}
396
397/*
398 * Special case the handling of "..".
399 */
400/* ARGSUSED */
401int
402zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
403 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
404{
405 zfs_sb_t *zsb = ITOZSB(dip);
406 int error = 0;
407
408 ZFS_ENTER(zsb);
409
410 if (strcmp(name, "..") == 0) {
411 *ipp = dip->i_sb->s_root->d_inode;
412 } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
413 *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR,
414 &zpl_fops_snapdir, &zpl_ops_snapdir);
415 } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
416 *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES,
417 &zpl_fops_shares, &zpl_ops_shares);
418 } else {
419 *ipp = NULL;
420 }
421
422 if (*ipp == NULL)
423 error = ENOENT;
424
425 ZFS_EXIT(zsb);
426
427 return (error);
428}
429
430/*
431 * Lookup entry point for the 'snapshot' directory. Try to open the
432 * snapshot if it exist, creating the pseudo filesystem inode as necessary.
433 * Perform a mount of the associated dataset on top of the inode.
434 */
435/* ARGSUSED */
436int
437zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
438 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
439{
440 zfs_sb_t *zsb = ITOZSB(dip);
441 uint64_t id;
442 int error;
443
444 ZFS_ENTER(zsb);
445
6772fb67 446 error = dmu_snapshot_lookup(zsb->z_os, name, &id);
ebe7e575
BB
447 if (error) {
448 ZFS_EXIT(zsb);
449 return (error);
450 }
451
452 *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id,
453 &simple_dir_operations, &simple_dir_inode_operations);
454 if (*ipp) {
455#ifdef HAVE_AUTOMOUNT
456 (*ipp)->i_flags |= S_AUTOMOUNT;
457#endif /* HAVE_AUTOMOUNT */
458 } else {
459 error = ENOENT;
460 }
461
462 ZFS_EXIT(zsb);
463
464 return (error);
465}
466
467static void
468zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name)
469{
470 avl_index_t where;
471
472 ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock));
473 ASSERT(sep != NULL);
474
475 /*
476 * Change the name in the AVL tree.
477 */
478 avl_remove(&zsb->z_ctldir_snaps, sep);
479 (void) strcpy(sep->se_name, name);
480 VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL);
481 avl_insert(&zsb->z_ctldir_snaps, sep, where);
482}
483
484/*
485 * Renaming a directory under '.zfs/snapshot' will automatically trigger
486 * a rename of the snapshot to the new given name. The rename is confined
487 * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
488 */
489/*ARGSUSED*/
490int
491zfsctl_snapdir_rename(struct inode *sdip, char *sname,
492 struct inode *tdip, char *tname, cred_t *cr, int flags)
493{
494 zfs_sb_t *zsb = ITOZSB(sdip);
495 zfs_snapentry_t search, *sep;
496 avl_index_t where;
497 char *to, *from, *real;
498 int error;
499
500 ZFS_ENTER(zsb);
501
502 to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
503 from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
504 real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
505
506 if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
507 error = dmu_snapshot_realname(zsb->z_os, sname, real,
508 MAXNAMELEN, NULL);
509 if (error == 0) {
510 sname = real;
511 } else if (error != ENOTSUP) {
512 goto out;
513 }
514 }
515
516 error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from);
517 if (!error)
518 error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to);
519 if (!error)
520 error = zfs_secpolicy_rename_perms(from, to, cr);
521 if (error)
522 goto out;
523
524 /*
525 * Cannot move snapshots out of the snapdir.
526 */
527 if (sdip != tdip) {
528 error = EINVAL;
529 goto out;
530 }
531
532 /*
533 * No-op when names are identical.
534 */
535 if (strcmp(sname, tname) == 0) {
536 error = 0;
537 goto out;
538 }
539
540 mutex_enter(&zsb->z_ctldir_lock);
541
542 error = dmu_objset_rename(from, to, B_FALSE);
543 if (error)
544 goto out_unlock;
545
546 search.se_name = (char *)sname;
547 sep = avl_find(&zsb->z_ctldir_snaps, &search, &where);
548 if (sep)
549 zfsctl_rename_snap(zsb, sep, tname);
550
551out_unlock:
552 mutex_exit(&zsb->z_ctldir_lock);
553out:
554 kmem_free(from, MAXNAMELEN);
555 kmem_free(to, MAXNAMELEN);
556 kmem_free(real, MAXNAMELEN);
557
558 ZFS_EXIT(zsb);
559
560 return (error);
561}
562
563/*
564 * Removing a directory under '.zfs/snapshot' will automatically trigger
565 * the removal of the snapshot with the given name.
566 */
567/* ARGSUSED */
568int
569zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
570{
571 zfs_sb_t *zsb = ITOZSB(dip);
572 char *snapname, *real;
573 int error;
574
575 ZFS_ENTER(zsb);
576
577 snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
578 real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
579
580 if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
581 error = dmu_snapshot_realname(zsb->z_os, name, real,
582 MAXNAMELEN, NULL);
583 if (error == 0) {
584 name = real;
585 } else if (error != ENOTSUP) {
586 goto out;
587 }
588 }
589
590 error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname);
591 if (!error)
592 error = zfs_secpolicy_destroy_perms(snapname, cr);
593 if (error)
594 goto out;
595
596 error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE);
597 if ((error == 0) || (error == ENOENT))
598 error = dmu_objset_destroy(snapname, B_FALSE);
599out:
600 kmem_free(snapname, MAXNAMELEN);
601 kmem_free(real, MAXNAMELEN);
602
603 ZFS_EXIT(zsb);
604
605 return (error);
606}
607
608/*
609 * Creating a directory under '.zfs/snapshot' will automatically trigger
610 * the creation of a new snapshot with the given name.
611 */
612/* ARGSUSED */
613int
614zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
615 struct inode **ipp, cred_t *cr, int flags)
616{
617 zfs_sb_t *zsb = ITOZSB(dip);
618 char *dsname;
619 int error;
620
621 dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
622
623 if (snapshot_namecheck(dirname, NULL, NULL) != 0) {
624 error = EILSEQ;
625 goto out;
626 }
627
628 dmu_objset_name(zsb->z_os, dsname);
629
630 error = zfs_secpolicy_snapshot_perms(dsname, cr);
631 if (error)
632 goto out;
633
634 if (error == 0) {
635 error = dmu_objset_snapshot(dsname, dirname,
636 NULL, NULL, B_FALSE, B_FALSE, -1);
637 if (error)
638 goto out;
639
640 error = zfsctl_snapdir_lookup(dip, dirname, ipp,
641 0, cr, NULL, NULL);
642 }
643out:
644 kmem_free(dsname, MAXNAMELEN);
645
646 return (error);
647}
648
649/*
650 * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed
651 * from the snapshot list. This will normally happen as part of the auto
652 * unmount, however in the case of a manual snapshot unmount this will be
653 * the only notification we receive.
654 */
655void
656zfsctl_snapdir_inactive(struct inode *ip)
657{
658 zfs_sb_t *zsb = ITOZSB(ip);
659 zfs_snapentry_t *sep, *next;
660
661 mutex_enter(&zsb->z_ctldir_lock);
662
663 sep = avl_first(&zsb->z_ctldir_snaps);
664 while (sep != NULL) {
665 next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
666
667 if (sep->se_inode == ip) {
668 avl_remove(&zsb->z_ctldir_snaps, sep);
2ae10319 669 taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
ebe7e575
BB
670 zfsctl_sep_free(sep);
671 break;
672 }
673 sep = next;
674 }
675
676 mutex_exit(&zsb->z_ctldir_lock);
677}
678
679/*
680 * Attempt to unmount a snapshot by making a call to user space.
681 * There is no assurance that this can or will succeed, is just a
682 * best effort. In the case where it does fail, perhaps because
683 * it's in use, the unmount will fail harmlessly.
684 */
685#define SET_UNMOUNT_CMD \
686 "exec 0</dev/null " \
687 " 1>/dev/null " \
688 " 2>/dev/null; " \
94a9bb47 689 "umount -t zfs -n %s'%s'"
ebe7e575
BB
690
691static int
692__zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags)
693{
694 char *argv[] = { "/bin/sh", "-c", NULL, NULL };
695 char *envp[] = { NULL };
696 int error;
697
698 argv[2] = kmem_asprintf(SET_UNMOUNT_CMD,
699 flags & MNT_FORCE ? "-f " : "", sep->se_path);
761394b3 700 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
ebe7e575
BB
701 strfree(argv[2]);
702
703 /*
704 * The umount system utility will return 256 on error. We must
705 * assume this error is because the file system is busy so it is
706 * converted to the more sensible EBUSY.
707 */
708 if (error)
709 error = EBUSY;
710
711 /*
712 * This was the result of a manual unmount, cancel the delayed work
713 * to prevent zfsctl_expire_snapshot() from attempting a unmount.
714 */
715 if ((error == 0) && !(flags & MNT_EXPIRE))
2ae10319
BB
716 taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
717
ebe7e575
BB
718
719 return (error);
720}
721
722int
723zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags)
724{
725 zfs_snapentry_t search;
726 zfs_snapentry_t *sep;
727 int error = 0;
728
729 mutex_enter(&zsb->z_ctldir_lock);
730
731 search.se_name = name;
732 sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
733 if (sep) {
734 avl_remove(&zsb->z_ctldir_snaps, sep);
76351672
BB
735 mutex_exit(&zsb->z_ctldir_lock);
736
ebe7e575 737 error = __zfsctl_unmount_snapshot(sep, flags);
76351672
BB
738
739 mutex_enter(&zsb->z_ctldir_lock);
ebe7e575
BB
740 if (error == EBUSY)
741 avl_add(&zsb->z_ctldir_snaps, sep);
742 else
743 zfsctl_sep_free(sep);
744 } else {
745 error = ENOENT;
746 }
747
748 mutex_exit(&zsb->z_ctldir_lock);
749 ASSERT3S(error, >=, 0);
750
751 return (error);
752}
753
754/*
755 * Traverse all mounted snapshots and attempt to unmount them. This
756 * is best effort, on failure EEXIST is returned and count will be set
757 * to the number of file snapshots which could not be unmounted.
758 */
759int
760zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count)
761{
762 zfs_snapentry_t *sep, *next;
763 int error = 0;
764
765 *count = 0;
766
767 ASSERT(zsb->z_ctldir != NULL);
768 mutex_enter(&zsb->z_ctldir_lock);
769
770 sep = avl_first(&zsb->z_ctldir_snaps);
771 while (sep != NULL) {
772 next = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
773 avl_remove(&zsb->z_ctldir_snaps, sep);
76351672
BB
774 mutex_exit(&zsb->z_ctldir_lock);
775
ebe7e575 776 error = __zfsctl_unmount_snapshot(sep, flags);
76351672
BB
777
778 mutex_enter(&zsb->z_ctldir_lock);
ebe7e575
BB
779 if (error == EBUSY) {
780 avl_add(&zsb->z_ctldir_snaps, sep);
781 (*count)++;
782 } else {
783 zfsctl_sep_free(sep);
784 }
785
786 sep = next;
787 }
788
789 mutex_exit(&zsb->z_ctldir_lock);
790
791 return ((*count > 0) ? EEXIST : 0);
792}
793
794#define SET_MOUNT_CMD \
795 "exec 0</dev/null " \
796 " 1>/dev/null " \
797 " 2>/dev/null; " \
c7dfc086 798 "mount -t zfs -n '%s' '%s'"
ebe7e575
BB
799
800int
801zfsctl_mount_snapshot(struct path *path, int flags)
802{
803 struct dentry *dentry = path->dentry;
804 struct inode *ip = dentry->d_inode;
805 zfs_sb_t *zsb = ITOZSB(ip);
806 char *full_name, *full_path;
807 zfs_snapentry_t *sep;
808 zfs_snapentry_t search;
809 char *argv[] = { "/bin/sh", "-c", NULL, NULL };
810 char *envp[] = { NULL };
811 int error;
812
813 ZFS_ENTER(zsb);
814
815 full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
816 full_path = kmem_zalloc(PATH_MAX, KM_SLEEP);
817
818 error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name);
819 if (error)
820 goto error;
821
822 error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path);
823 if (error)
824 goto error;
825
826 /*
827 * Attempt to mount the snapshot from user space. Normally this
828 * would be done using the vfs_kern_mount() function, however that
829 * function is marked GPL-only and cannot be used. On error we
830 * careful to log the real error to the console and return EISDIR
831 * to safely abort the automount. This should be very rare.
832 */
833 argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path);
761394b3 834 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
ebe7e575
BB
835 strfree(argv[2]);
836 if (error) {
837 printk("ZFS: Unable to automount %s at %s: %d\n",
838 full_name, full_path, error);
839 error = EISDIR;
840 goto error;
841 }
842
843 mutex_enter(&zsb->z_ctldir_lock);
844
845 /*
846 * Ensure a previous entry does not exist, if it does safely remove
847 * it any cancel the outstanding expiration. This can occur when a
848 * snapshot is manually unmounted and then an automount is triggered.
849 */
850 search.se_name = full_name;
851 sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL);
852 if (sep) {
853 avl_remove(&zsb->z_ctldir_snaps, sep);
2ae10319 854 taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid);
ebe7e575
BB
855 zfsctl_sep_free(sep);
856 }
857
858 sep = zfsctl_sep_alloc();
859 sep->se_name = full_name;
860 sep->se_path = full_path;
861 sep->se_inode = ip;
862 avl_add(&zsb->z_ctldir_snaps, sep);
863
2ae10319
BB
864 sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
865 zfsctl_expire_snapshot, sep, TQ_SLEEP,
866 ddi_get_lbolt() + zfs_expire_snapshot * HZ);
ebe7e575
BB
867
868 mutex_exit(&zsb->z_ctldir_lock);
869error:
870 if (error) {
871 kmem_free(full_name, MAXNAMELEN);
872 kmem_free(full_path, PATH_MAX);
873 }
874
875 ZFS_EXIT(zsb);
876
877 return (error);
878}
879
880/*
881 * Check if this super block has a matching objset id.
882 */
883static int
884zfsctl_test_super(struct super_block *sb, void *objsetidp)
885{
886 zfs_sb_t *zsb = sb->s_fs_info;
887 uint64_t objsetid = *(uint64_t *)objsetidp;
888
889 return (dmu_objset_id(zsb->z_os) == objsetid);
890}
891
892/*
893 * Prevent a new super block from being allocated if an existing one
894 * could not be located. We only want to preform a lookup operation.
895 */
896static int
897zfsctl_set_super(struct super_block *sb, void *objsetidp)
898{
899 return (-EEXIST);
900}
901
902int
903zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
904{
905 zfs_sb_t *zsb = sb->s_fs_info;
906 struct super_block *sbp;
907 zfs_snapentry_t *sep;
908 uint64_t id;
909 int error;
910
911 ASSERT(zsb->z_ctldir != NULL);
912
913 mutex_enter(&zsb->z_ctldir_lock);
914
915 /*
916 * Verify that the snapshot is mounted.
917 */
918 sep = avl_first(&zsb->z_ctldir_snaps);
919 while (sep != NULL) {
6772fb67 920 error = dmu_snapshot_lookup(zsb->z_os, sep->se_name, &id);
ebe7e575
BB
921 if (error)
922 goto out;
923
924 if (id == objsetid)
925 break;
926
927 sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep);
928 }
929
930 if (sep != NULL) {
931 /*
932 * Lookup the mounted root rather than the covered mount
933 * point. This may fail if the snapshot has just been
934 * unmounted by an unrelated user space process. This
935 * race cannot occur to an expired mount point because
936 * we hold the zsb->z_ctldir_lock to prevent the race.
937 */
3c203610
YS
938 sbp = zpl_sget(&zpl_fs_type, zfsctl_test_super,
939 zfsctl_set_super, 0, &id);
ebe7e575
BB
940 if (IS_ERR(sbp)) {
941 error = -PTR_ERR(sbp);
942 } else {
943 *zsbp = sbp->s_fs_info;
944 deactivate_super(sbp);
945 }
946 } else {
947 error = EINVAL;
948 }
949out:
950 mutex_exit(&zsb->z_ctldir_lock);
951 ASSERT3S(error, >=, 0);
952
953 return (error);
954}
955
956/* ARGSUSED */
957int
958zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
959 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
960{
961 zfs_sb_t *zsb = ITOZSB(dip);
962 struct inode *ip;
963 znode_t *dzp;
964 int error;
965
966 ZFS_ENTER(zsb);
967
968 if (zsb->z_shares_dir == 0) {
969 ZFS_EXIT(zsb);
45ca2d91 970 return (ENOTSUP);
ebe7e575
BB
971 }
972
973 error = zfs_zget(zsb, zsb->z_shares_dir, &dzp);
974 if (error) {
975 ZFS_EXIT(zsb);
976 return (error);
977 }
978
979 error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
980
981 iput(ZTOI(dzp));
982 ZFS_EXIT(zsb);
983
984 return (error);
985}
986
987
988/*
989 * Initialize the various pieces we'll need to create and manipulate .zfs
990 * directories. Currently this is unused but available.
991 */
992void
993zfsctl_init(void)
994{
2ae10319
BB
995 zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri,
996 1, 8, TASKQ_PREPOPULATE);
ebe7e575
BB
997}
998
999/*
1000 * Cleanup the various pieces we needed for .zfs directories. In particular
1001 * ensure the expiry timer is canceled safely.
1002 */
1003void
1004zfsctl_fini(void)
1005{
2ae10319 1006 taskq_destroy(zfs_expire_taskq);
ebe7e575
BB
1007}
1008
1009module_param(zfs_expire_snapshot, int, 0644);
1010MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");