]>
Commit | Line | Data |
---|---|---|
ebe7e575 BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * | |
23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Copyright (C) 2011 Lawrence Livermore National Security, LLC. | |
25 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
26 | * LLNL-CODE-403049. | |
27 | * Rewritten for Linux by: | |
28 | * Rohan Puri <rohan.puri15@gmail.com> | |
29 | * Brian Behlendorf <behlendorf1@llnl.gov> | |
2e528b49 | 30 | * Copyright (c) 2013 by Delphix. All rights reserved. |
ebe7e575 BB |
31 | */ |
32 | ||
33 | /* | |
34 | * ZFS control directory (a.k.a. ".zfs") | |
35 | * | |
36 | * This directory provides a common location for all ZFS meta-objects. | |
37 | * Currently, this is only the 'snapshot' and 'shares' directory, but this may | |
38 | * expand in the future. The elements are built dynamically, as the hierarchy | |
39 | * does not actually exist on disk. | |
40 | * | |
41 | * For 'snapshot', we don't want to have all snapshots always mounted, because | |
42 | * this would take up a huge amount of space in /etc/mnttab. We have three | |
43 | * types of objects: | |
44 | * | |
45 | * ctldir ------> snapshotdir -------> snapshot | |
46 | * | | |
47 | * | | |
48 | * V | |
49 | * mounted fs | |
50 | * | |
51 | * The 'snapshot' node contains just enough information to lookup '..' and act | |
52 | * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we | |
53 | * perform an automount of the underlying filesystem and return the | |
54 | * corresponding inode. | |
55 | * | |
56 | * All mounts are handled automatically by an user mode helper which invokes | |
57 | * the mount mount procedure. Unmounts are handled by allowing the mount | |
58 | * point to expire so the kernel may automatically unmount it. | |
59 | * | |
60 | * The '.zfs', '.zfs/snapshot', and all directories created under | |
61 | * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same | |
62 | * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under). | |
63 | * | |
64 | * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths | |
65 | * (ie: snapshots) are complete ZFS filesystems and have their own unique | |
66 | * zfs_sb_t. However, the fsid reported by these mounts will be the same | |
67 | * as that used by the parent zfs_sb_t to make NFS happy. | |
68 | */ | |
69 | ||
70 | #include <sys/types.h> | |
71 | #include <sys/param.h> | |
72 | #include <sys/time.h> | |
73 | #include <sys/systm.h> | |
74 | #include <sys/sysmacros.h> | |
75 | #include <sys/pathname.h> | |
76 | #include <sys/vfs.h> | |
77 | #include <sys/vfs_opreg.h> | |
78 | #include <sys/zfs_ctldir.h> | |
79 | #include <sys/zfs_ioctl.h> | |
80 | #include <sys/zfs_vfsops.h> | |
81 | #include <sys/zfs_vnops.h> | |
82 | #include <sys/stat.h> | |
83 | #include <sys/dmu.h> | |
13fe0198 | 84 | #include <sys/dsl_destroy.h> |
ebe7e575 BB |
85 | #include <sys/dsl_deleg.h> |
86 | #include <sys/mount.h> | |
87 | #include <sys/zpl.h> | |
88 | #include "zfs_namecheck.h" | |
89 | ||
90 | /* | |
91 | * Control Directory Tunables (.zfs) | |
92 | */ | |
93 | int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; | |
94 | ||
2ae10319 BB |
95 | /* |
96 | * Dedicated task queue for unmounting snapshots. | |
97 | */ | |
98 | static taskq_t *zfs_expire_taskq; | |
99 | ||
ebe7e575 BB |
100 | static zfs_snapentry_t * |
101 | zfsctl_sep_alloc(void) | |
102 | { | |
d1d7e268 | 103 | return (kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP)); |
ebe7e575 BB |
104 | } |
105 | ||
106 | void | |
107 | zfsctl_sep_free(zfs_snapentry_t *sep) | |
108 | { | |
109 | kmem_free(sep->se_name, MAXNAMELEN); | |
110 | kmem_free(sep->se_path, PATH_MAX); | |
111 | kmem_free(sep, sizeof (zfs_snapentry_t)); | |
112 | } | |
113 | ||
114 | /* | |
115 | * Attempt to expire an automounted snapshot, unmounts are attempted every | |
116 | * 'zfs_expire_snapshot' seconds until they succeed. The work request is | |
117 | * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t. | |
118 | */ | |
119 | static void | |
120 | zfsctl_expire_snapshot(void *data) | |
121 | { | |
2ae10319 BB |
122 | zfs_snapentry_t *sep = (zfs_snapentry_t *)data; |
123 | zfs_sb_t *zsb = ITOZSB(sep->se_inode); | |
ebe7e575 BB |
124 | int error; |
125 | ||
ebe7e575 BB |
126 | error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE); |
127 | if (error == EBUSY) | |
2ae10319 BB |
128 | sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq, |
129 | zfsctl_expire_snapshot, sep, TQ_SLEEP, | |
130 | ddi_get_lbolt() + zfs_expire_snapshot * HZ); | |
ebe7e575 BB |
131 | } |
132 | ||
133 | int | |
134 | snapentry_compare(const void *a, const void *b) | |
135 | { | |
136 | const zfs_snapentry_t *sa = a; | |
137 | const zfs_snapentry_t *sb = b; | |
138 | int ret = strcmp(sa->se_name, sb->se_name); | |
139 | ||
140 | if (ret < 0) | |
141 | return (-1); | |
142 | else if (ret > 0) | |
143 | return (1); | |
144 | else | |
145 | return (0); | |
146 | } | |
147 | ||
148 | boolean_t | |
149 | zfsctl_is_node(struct inode *ip) | |
150 | { | |
151 | return (ITOZ(ip)->z_is_ctldir); | |
152 | } | |
153 | ||
154 | boolean_t | |
155 | zfsctl_is_snapdir(struct inode *ip) | |
156 | { | |
157 | return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); | |
158 | } | |
159 | ||
160 | /* | |
161 | * Allocate a new inode with the passed id and ops. | |
162 | */ | |
163 | static struct inode * | |
164 | zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id, | |
165 | const struct file_operations *fops, const struct inode_operations *ops) | |
166 | { | |
167 | struct timespec now = current_fs_time(zsb->z_sb); | |
168 | struct inode *ip; | |
169 | znode_t *zp; | |
170 | ||
171 | ip = new_inode(zsb->z_sb); | |
172 | if (ip == NULL) | |
173 | return (NULL); | |
174 | ||
175 | zp = ITOZ(ip); | |
176 | ASSERT3P(zp->z_dirlocks, ==, NULL); | |
177 | ASSERT3P(zp->z_acl_cached, ==, NULL); | |
178 | ASSERT3P(zp->z_xattr_cached, ==, NULL); | |
179 | zp->z_id = id; | |
180 | zp->z_unlinked = 0; | |
181 | zp->z_atime_dirty = 0; | |
182 | zp->z_zn_prefetch = 0; | |
183 | zp->z_moved = 0; | |
184 | zp->z_sa_hdl = NULL; | |
185 | zp->z_blksz = 0; | |
186 | zp->z_seq = 0; | |
187 | zp->z_mapcnt = 0; | |
188 | zp->z_gen = 0; | |
189 | zp->z_size = 0; | |
190 | zp->z_atime[0] = 0; | |
191 | zp->z_atime[1] = 0; | |
192 | zp->z_links = 0; | |
193 | zp->z_pflags = 0; | |
194 | zp->z_uid = 0; | |
195 | zp->z_gid = 0; | |
196 | zp->z_mode = 0; | |
197 | zp->z_sync_cnt = 0; | |
198 | zp->z_is_zvol = B_FALSE; | |
199 | zp->z_is_mapped = B_FALSE; | |
200 | zp->z_is_ctldir = B_TRUE; | |
201 | zp->z_is_sa = B_FALSE; | |
7b3e34ba | 202 | zp->z_is_stale = B_FALSE; |
ebe7e575 BB |
203 | ip->i_ino = id; |
204 | ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO); | |
570d6edf RY |
205 | ip->i_uid = SUID_TO_KUID(0); |
206 | ip->i_gid = SGID_TO_KGID(0); | |
ebe7e575 BB |
207 | ip->i_blkbits = SPA_MINBLOCKSHIFT; |
208 | ip->i_atime = now; | |
209 | ip->i_mtime = now; | |
210 | ip->i_ctime = now; | |
211 | ip->i_fop = fops; | |
212 | ip->i_op = ops; | |
213 | ||
214 | if (insert_inode_locked(ip)) { | |
215 | unlock_new_inode(ip); | |
216 | iput(ip); | |
217 | return (NULL); | |
218 | } | |
219 | ||
220 | mutex_enter(&zsb->z_znodes_lock); | |
221 | list_insert_tail(&zsb->z_all_znodes, zp); | |
9ed86e7c | 222 | zsb->z_nr_znodes++; |
ebe7e575 BB |
223 | membar_producer(); |
224 | mutex_exit(&zsb->z_znodes_lock); | |
225 | ||
226 | unlock_new_inode(ip); | |
227 | ||
228 | return (ip); | |
229 | } | |
230 | ||
231 | /* | |
232 | * Lookup the inode with given id, it will be allocated if needed. | |
233 | */ | |
234 | static struct inode * | |
fc173c85 | 235 | zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id, |
ebe7e575 BB |
236 | const struct file_operations *fops, const struct inode_operations *ops) |
237 | { | |
238 | struct inode *ip = NULL; | |
239 | ||
240 | while (ip == NULL) { | |
fc173c85 | 241 | ip = ilookup(zsb->z_sb, (unsigned long)id); |
ebe7e575 BB |
242 | if (ip) |
243 | break; | |
244 | ||
245 | /* May fail due to concurrent zfsctl_inode_alloc() */ | |
246 | ip = zfsctl_inode_alloc(zsb, id, fops, ops); | |
247 | } | |
248 | ||
249 | return (ip); | |
250 | } | |
251 | ||
252 | /* | |
253 | * Free zfsctl inode specific structures, currently there are none. | |
254 | */ | |
255 | void | |
256 | zfsctl_inode_destroy(struct inode *ip) | |
257 | { | |
ebe7e575 BB |
258 | } |
259 | ||
260 | /* | |
261 | * An inode is being evicted from the cache. | |
262 | */ | |
263 | void | |
264 | zfsctl_inode_inactive(struct inode *ip) | |
265 | { | |
266 | if (zfsctl_is_snapdir(ip)) | |
267 | zfsctl_snapdir_inactive(ip); | |
268 | } | |
269 | ||
270 | /* | |
271 | * Create the '.zfs' directory. This directory is cached as part of the VFS | |
272 | * structure. This results in a hold on the zfs_sb_t. The code in zfs_umount() | |
273 | * therefore checks against a vfs_count of 2 instead of 1. This reference | |
274 | * is removed when the ctldir is destroyed in the unmount. All other entities | |
275 | * under the '.zfs' directory are created dynamically as needed. | |
fc173c85 BB |
276 | * |
277 | * Because the dynamically created '.zfs' directory entries assume the use | |
278 | * of 64-bit inode numbers this support must be disabled on 32-bit systems. | |
ebe7e575 BB |
279 | */ |
280 | int | |
281 | zfsctl_create(zfs_sb_t *zsb) | |
282 | { | |
fc173c85 | 283 | #if defined(CONFIG_64BIT) |
ebe7e575 BB |
284 | ASSERT(zsb->z_ctldir == NULL); |
285 | ||
286 | zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT, | |
287 | &zpl_fops_root, &zpl_ops_root); | |
288 | if (zsb->z_ctldir == NULL) | |
2e528b49 | 289 | return (SET_ERROR(ENOENT)); |
ebe7e575 BB |
290 | |
291 | return (0); | |
fc173c85 | 292 | #else |
2e528b49 | 293 | return (SET_ERROR(EOPNOTSUPP)); |
fc173c85 | 294 | #endif /* CONFIG_64BIT */ |
ebe7e575 BB |
295 | } |
296 | ||
297 | /* | |
298 | * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. | |
299 | */ | |
300 | void | |
301 | zfsctl_destroy(zfs_sb_t *zsb) | |
302 | { | |
303 | iput(zsb->z_ctldir); | |
304 | zsb->z_ctldir = NULL; | |
305 | } | |
306 | ||
307 | /* | |
308 | * Given a root znode, retrieve the associated .zfs directory. | |
309 | * Add a hold to the vnode and return it. | |
310 | */ | |
311 | struct inode * | |
312 | zfsctl_root(znode_t *zp) | |
313 | { | |
314 | ASSERT(zfs_has_ctldir(zp)); | |
315 | igrab(ZTOZSB(zp)->z_ctldir); | |
316 | return (ZTOZSB(zp)->z_ctldir); | |
317 | } | |
318 | ||
319 | /*ARGSUSED*/ | |
320 | int | |
321 | zfsctl_fid(struct inode *ip, fid_t *fidp) | |
322 | { | |
323 | znode_t *zp = ITOZ(ip); | |
324 | zfs_sb_t *zsb = ITOZSB(ip); | |
325 | uint64_t object = zp->z_id; | |
326 | zfid_short_t *zfid; | |
327 | int i; | |
328 | ||
329 | ZFS_ENTER(zsb); | |
330 | ||
331 | if (fidp->fid_len < SHORT_FID_LEN) { | |
332 | fidp->fid_len = SHORT_FID_LEN; | |
333 | ZFS_EXIT(zsb); | |
2e528b49 | 334 | return (SET_ERROR(ENOSPC)); |
ebe7e575 BB |
335 | } |
336 | ||
337 | zfid = (zfid_short_t *)fidp; | |
338 | ||
339 | zfid->zf_len = SHORT_FID_LEN; | |
340 | ||
341 | for (i = 0; i < sizeof (zfid->zf_object); i++) | |
342 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
343 | ||
344 | /* .zfs znodes always have a generation number of 0 */ | |
345 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
346 | zfid->zf_gen[i] = 0; | |
347 | ||
348 | ZFS_EXIT(zsb); | |
349 | return (0); | |
350 | } | |
351 | ||
352 | static int | |
353 | zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname) | |
354 | { | |
355 | objset_t *os = ITOZSB(ip)->z_os; | |
356 | ||
357 | if (snapshot_namecheck(name, NULL, NULL) != 0) | |
2e528b49 | 358 | return (SET_ERROR(EILSEQ)); |
ebe7e575 BB |
359 | |
360 | dmu_objset_name(os, zname); | |
361 | if ((strlen(zname) + 1 + strlen(name)) >= len) | |
2e528b49 | 362 | return (SET_ERROR(ENAMETOOLONG)); |
ebe7e575 BB |
363 | |
364 | (void) strcat(zname, "@"); | |
365 | (void) strcat(zname, name); | |
366 | ||
367 | return (0); | |
368 | } | |
369 | ||
e49f1e20 WA |
370 | /* |
371 | * Gets the full dataset name that corresponds to the given snapshot name | |
372 | * Example: | |
373 | * zfsctl_snapshot_zname("snap1") -> "mypool/myfs@snap1" | |
374 | */ | |
ebe7e575 BB |
375 | static int |
376 | zfsctl_snapshot_zpath(struct path *path, int len, char *zpath) | |
377 | { | |
378 | char *path_buffer, *path_ptr; | |
379 | int path_len, error = 0; | |
380 | ||
381 | path_buffer = kmem_alloc(len, KM_SLEEP); | |
382 | ||
383 | path_ptr = d_path(path, path_buffer, len); | |
384 | if (IS_ERR(path_ptr)) { | |
385 | error = -PTR_ERR(path_ptr); | |
386 | goto out; | |
387 | } | |
388 | ||
389 | path_len = path_buffer + len - 1 - path_ptr; | |
390 | if (path_len > len) { | |
2e528b49 | 391 | error = SET_ERROR(EFAULT); |
ebe7e575 BB |
392 | goto out; |
393 | } | |
394 | ||
395 | memcpy(zpath, path_ptr, path_len); | |
396 | zpath[path_len] = '\0'; | |
397 | out: | |
398 | kmem_free(path_buffer, len); | |
399 | ||
400 | return (error); | |
401 | } | |
402 | ||
403 | /* | |
404 | * Special case the handling of "..". | |
405 | */ | |
406 | /* ARGSUSED */ | |
407 | int | |
408 | zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp, | |
409 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
410 | { | |
411 | zfs_sb_t *zsb = ITOZSB(dip); | |
412 | int error = 0; | |
413 | ||
414 | ZFS_ENTER(zsb); | |
415 | ||
416 | if (strcmp(name, "..") == 0) { | |
417 | *ipp = dip->i_sb->s_root->d_inode; | |
418 | } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { | |
419 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR, | |
420 | &zpl_fops_snapdir, &zpl_ops_snapdir); | |
421 | } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { | |
422 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES, | |
423 | &zpl_fops_shares, &zpl_ops_shares); | |
424 | } else { | |
425 | *ipp = NULL; | |
426 | } | |
427 | ||
428 | if (*ipp == NULL) | |
2e528b49 | 429 | error = SET_ERROR(ENOENT); |
ebe7e575 BB |
430 | |
431 | ZFS_EXIT(zsb); | |
432 | ||
433 | return (error); | |
434 | } | |
435 | ||
436 | /* | |
437 | * Lookup entry point for the 'snapshot' directory. Try to open the | |
438 | * snapshot if it exist, creating the pseudo filesystem inode as necessary. | |
439 | * Perform a mount of the associated dataset on top of the inode. | |
440 | */ | |
441 | /* ARGSUSED */ | |
442 | int | |
443 | zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp, | |
444 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
445 | { | |
446 | zfs_sb_t *zsb = ITOZSB(dip); | |
447 | uint64_t id; | |
448 | int error; | |
449 | ||
450 | ZFS_ENTER(zsb); | |
451 | ||
6772fb67 | 452 | error = dmu_snapshot_lookup(zsb->z_os, name, &id); |
ebe7e575 BB |
453 | if (error) { |
454 | ZFS_EXIT(zsb); | |
455 | return (error); | |
456 | } | |
457 | ||
458 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id, | |
459 | &simple_dir_operations, &simple_dir_inode_operations); | |
460 | if (*ipp) { | |
461 | #ifdef HAVE_AUTOMOUNT | |
462 | (*ipp)->i_flags |= S_AUTOMOUNT; | |
463 | #endif /* HAVE_AUTOMOUNT */ | |
464 | } else { | |
2e528b49 | 465 | error = SET_ERROR(ENOENT); |
ebe7e575 BB |
466 | } |
467 | ||
468 | ZFS_EXIT(zsb); | |
469 | ||
470 | return (error); | |
471 | } | |
472 | ||
473 | static void | |
474 | zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name) | |
475 | { | |
476 | avl_index_t where; | |
477 | ||
478 | ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock)); | |
479 | ASSERT(sep != NULL); | |
480 | ||
481 | /* | |
482 | * Change the name in the AVL tree. | |
483 | */ | |
484 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
485 | (void) strcpy(sep->se_name, name); | |
486 | VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL); | |
487 | avl_insert(&zsb->z_ctldir_snaps, sep, where); | |
488 | } | |
489 | ||
490 | /* | |
491 | * Renaming a directory under '.zfs/snapshot' will automatically trigger | |
492 | * a rename of the snapshot to the new given name. The rename is confined | |
493 | * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. | |
494 | */ | |
495 | /*ARGSUSED*/ | |
496 | int | |
13fe0198 MA |
497 | zfsctl_snapdir_rename(struct inode *sdip, char *snm, |
498 | struct inode *tdip, char *tnm, cred_t *cr, int flags) | |
ebe7e575 BB |
499 | { |
500 | zfs_sb_t *zsb = ITOZSB(sdip); | |
501 | zfs_snapentry_t search, *sep; | |
502 | avl_index_t where; | |
13fe0198 | 503 | char *to, *from, *real, *fsname; |
ebe7e575 BB |
504 | int error; |
505 | ||
506 | ZFS_ENTER(zsb); | |
507 | ||
508 | to = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
509 | from = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
510 | real = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
13fe0198 | 511 | fsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); |
ebe7e575 BB |
512 | |
513 | if (zsb->z_case == ZFS_CASE_INSENSITIVE) { | |
13fe0198 | 514 | error = dmu_snapshot_realname(zsb->z_os, snm, real, |
ebe7e575 BB |
515 | MAXNAMELEN, NULL); |
516 | if (error == 0) { | |
13fe0198 | 517 | snm = real; |
ebe7e575 BB |
518 | } else if (error != ENOTSUP) { |
519 | goto out; | |
520 | } | |
521 | } | |
522 | ||
13fe0198 MA |
523 | dmu_objset_name(zsb->z_os, fsname); |
524 | ||
525 | error = zfsctl_snapshot_zname(sdip, snm, MAXNAMELEN, from); | |
526 | if (error == 0) | |
527 | error = zfsctl_snapshot_zname(tdip, tnm, MAXNAMELEN, to); | |
528 | if (error == 0) | |
ebe7e575 | 529 | error = zfs_secpolicy_rename_perms(from, to, cr); |
13fe0198 | 530 | if (error != 0) |
ebe7e575 BB |
531 | goto out; |
532 | ||
533 | /* | |
534 | * Cannot move snapshots out of the snapdir. | |
535 | */ | |
536 | if (sdip != tdip) { | |
2e528b49 | 537 | error = SET_ERROR(EINVAL); |
ebe7e575 BB |
538 | goto out; |
539 | } | |
540 | ||
541 | /* | |
542 | * No-op when names are identical. | |
543 | */ | |
13fe0198 | 544 | if (strcmp(snm, tnm) == 0) { |
ebe7e575 BB |
545 | error = 0; |
546 | goto out; | |
547 | } | |
548 | ||
549 | mutex_enter(&zsb->z_ctldir_lock); | |
550 | ||
13fe0198 | 551 | error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE); |
ebe7e575 BB |
552 | if (error) |
553 | goto out_unlock; | |
554 | ||
13fe0198 | 555 | search.se_name = (char *)snm; |
ebe7e575 BB |
556 | sep = avl_find(&zsb->z_ctldir_snaps, &search, &where); |
557 | if (sep) | |
13fe0198 | 558 | zfsctl_rename_snap(zsb, sep, tnm); |
ebe7e575 BB |
559 | |
560 | out_unlock: | |
561 | mutex_exit(&zsb->z_ctldir_lock); | |
562 | out: | |
563 | kmem_free(from, MAXNAMELEN); | |
564 | kmem_free(to, MAXNAMELEN); | |
565 | kmem_free(real, MAXNAMELEN); | |
13fe0198 | 566 | kmem_free(fsname, MAXNAMELEN); |
ebe7e575 BB |
567 | |
568 | ZFS_EXIT(zsb); | |
569 | ||
570 | return (error); | |
571 | } | |
572 | ||
573 | /* | |
574 | * Removing a directory under '.zfs/snapshot' will automatically trigger | |
575 | * the removal of the snapshot with the given name. | |
576 | */ | |
577 | /* ARGSUSED */ | |
578 | int | |
579 | zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) | |
580 | { | |
581 | zfs_sb_t *zsb = ITOZSB(dip); | |
582 | char *snapname, *real; | |
583 | int error; | |
584 | ||
585 | ZFS_ENTER(zsb); | |
586 | ||
587 | snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
588 | real = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
589 | ||
590 | if (zsb->z_case == ZFS_CASE_INSENSITIVE) { | |
591 | error = dmu_snapshot_realname(zsb->z_os, name, real, | |
592 | MAXNAMELEN, NULL); | |
593 | if (error == 0) { | |
594 | name = real; | |
595 | } else if (error != ENOTSUP) { | |
596 | goto out; | |
597 | } | |
598 | } | |
599 | ||
600 | error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname); | |
13fe0198 | 601 | if (error == 0) |
ebe7e575 | 602 | error = zfs_secpolicy_destroy_perms(snapname, cr); |
13fe0198 | 603 | if (error != 0) |
ebe7e575 BB |
604 | goto out; |
605 | ||
606 | error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE); | |
607 | if ((error == 0) || (error == ENOENT)) | |
13fe0198 | 608 | error = dsl_destroy_snapshot(snapname, B_FALSE); |
ebe7e575 BB |
609 | out: |
610 | kmem_free(snapname, MAXNAMELEN); | |
611 | kmem_free(real, MAXNAMELEN); | |
612 | ||
613 | ZFS_EXIT(zsb); | |
614 | ||
615 | return (error); | |
616 | } | |
617 | ||
618 | /* | |
619 | * Creating a directory under '.zfs/snapshot' will automatically trigger | |
620 | * the creation of a new snapshot with the given name. | |
621 | */ | |
622 | /* ARGSUSED */ | |
623 | int | |
624 | zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, | |
625 | struct inode **ipp, cred_t *cr, int flags) | |
626 | { | |
627 | zfs_sb_t *zsb = ITOZSB(dip); | |
628 | char *dsname; | |
629 | int error; | |
630 | ||
631 | dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
632 | ||
633 | if (snapshot_namecheck(dirname, NULL, NULL) != 0) { | |
2e528b49 | 634 | error = SET_ERROR(EILSEQ); |
ebe7e575 BB |
635 | goto out; |
636 | } | |
637 | ||
638 | dmu_objset_name(zsb->z_os, dsname); | |
639 | ||
640 | error = zfs_secpolicy_snapshot_perms(dsname, cr); | |
13fe0198 | 641 | if (error != 0) |
ebe7e575 BB |
642 | goto out; |
643 | ||
644 | if (error == 0) { | |
6f1ffb06 | 645 | error = dmu_objset_snapshot_one(dsname, dirname); |
13fe0198 | 646 | if (error != 0) |
ebe7e575 BB |
647 | goto out; |
648 | ||
649 | error = zfsctl_snapdir_lookup(dip, dirname, ipp, | |
650 | 0, cr, NULL, NULL); | |
651 | } | |
652 | out: | |
653 | kmem_free(dsname, MAXNAMELEN); | |
654 | ||
655 | return (error); | |
656 | } | |
657 | ||
658 | /* | |
659 | * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed | |
660 | * from the snapshot list. This will normally happen as part of the auto | |
661 | * unmount, however in the case of a manual snapshot unmount this will be | |
662 | * the only notification we receive. | |
663 | */ | |
664 | void | |
665 | zfsctl_snapdir_inactive(struct inode *ip) | |
666 | { | |
667 | zfs_sb_t *zsb = ITOZSB(ip); | |
668 | zfs_snapentry_t *sep, *next; | |
669 | ||
670 | mutex_enter(&zsb->z_ctldir_lock); | |
671 | ||
672 | sep = avl_first(&zsb->z_ctldir_snaps); | |
673 | while (sep != NULL) { | |
674 | next = AVL_NEXT(&zsb->z_ctldir_snaps, sep); | |
675 | ||
676 | if (sep->se_inode == ip) { | |
677 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
2ae10319 | 678 | taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid); |
ebe7e575 BB |
679 | zfsctl_sep_free(sep); |
680 | break; | |
681 | } | |
682 | sep = next; | |
683 | } | |
684 | ||
685 | mutex_exit(&zsb->z_ctldir_lock); | |
686 | } | |
687 | ||
688 | /* | |
689 | * Attempt to unmount a snapshot by making a call to user space. | |
690 | * There is no assurance that this can or will succeed, is just a | |
691 | * best effort. In the case where it does fail, perhaps because | |
692 | * it's in use, the unmount will fail harmlessly. | |
693 | */ | |
fd4f7616 | 694 | #define SET_UNMOUNT_CMD \ |
ebe7e575 BB |
695 | "exec 0</dev/null " \ |
696 | " 1>/dev/null " \ | |
697 | " 2>/dev/null; " \ | |
94a9bb47 | 698 | "umount -t zfs -n %s'%s'" |
ebe7e575 BB |
699 | |
700 | static int | |
701 | __zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags) | |
702 | { | |
703 | char *argv[] = { "/bin/sh", "-c", NULL, NULL }; | |
704 | char *envp[] = { NULL }; | |
705 | int error; | |
706 | ||
707 | argv[2] = kmem_asprintf(SET_UNMOUNT_CMD, | |
708 | flags & MNT_FORCE ? "-f " : "", sep->se_path); | |
761394b3 | 709 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
ebe7e575 BB |
710 | strfree(argv[2]); |
711 | ||
712 | /* | |
713 | * The umount system utility will return 256 on error. We must | |
714 | * assume this error is because the file system is busy so it is | |
715 | * converted to the more sensible EBUSY. | |
716 | */ | |
717 | if (error) | |
2e528b49 | 718 | error = SET_ERROR(EBUSY); |
ebe7e575 BB |
719 | |
720 | /* | |
721 | * This was the result of a manual unmount, cancel the delayed work | |
722 | * to prevent zfsctl_expire_snapshot() from attempting a unmount. | |
723 | */ | |
724 | if ((error == 0) && !(flags & MNT_EXPIRE)) | |
2ae10319 BB |
725 | taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid); |
726 | ||
ebe7e575 BB |
727 | |
728 | return (error); | |
729 | } | |
730 | ||
731 | int | |
732 | zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags) | |
733 | { | |
734 | zfs_snapentry_t search; | |
735 | zfs_snapentry_t *sep; | |
736 | int error = 0; | |
737 | ||
738 | mutex_enter(&zsb->z_ctldir_lock); | |
739 | ||
740 | search.se_name = name; | |
741 | sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL); | |
742 | if (sep) { | |
743 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
76351672 BB |
744 | mutex_exit(&zsb->z_ctldir_lock); |
745 | ||
ebe7e575 | 746 | error = __zfsctl_unmount_snapshot(sep, flags); |
76351672 BB |
747 | |
748 | mutex_enter(&zsb->z_ctldir_lock); | |
ebe7e575 BB |
749 | if (error == EBUSY) |
750 | avl_add(&zsb->z_ctldir_snaps, sep); | |
751 | else | |
752 | zfsctl_sep_free(sep); | |
753 | } else { | |
2e528b49 | 754 | error = SET_ERROR(ENOENT); |
ebe7e575 BB |
755 | } |
756 | ||
757 | mutex_exit(&zsb->z_ctldir_lock); | |
758 | ASSERT3S(error, >=, 0); | |
759 | ||
760 | return (error); | |
761 | } | |
762 | ||
763 | /* | |
764 | * Traverse all mounted snapshots and attempt to unmount them. This | |
765 | * is best effort, on failure EEXIST is returned and count will be set | |
766 | * to the number of file snapshots which could not be unmounted. | |
767 | */ | |
768 | int | |
769 | zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count) | |
770 | { | |
771 | zfs_snapentry_t *sep, *next; | |
772 | int error = 0; | |
773 | ||
774 | *count = 0; | |
775 | ||
776 | ASSERT(zsb->z_ctldir != NULL); | |
777 | mutex_enter(&zsb->z_ctldir_lock); | |
778 | ||
779 | sep = avl_first(&zsb->z_ctldir_snaps); | |
780 | while (sep != NULL) { | |
781 | next = AVL_NEXT(&zsb->z_ctldir_snaps, sep); | |
782 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
76351672 BB |
783 | mutex_exit(&zsb->z_ctldir_lock); |
784 | ||
ebe7e575 | 785 | error = __zfsctl_unmount_snapshot(sep, flags); |
76351672 BB |
786 | |
787 | mutex_enter(&zsb->z_ctldir_lock); | |
ebe7e575 BB |
788 | if (error == EBUSY) { |
789 | avl_add(&zsb->z_ctldir_snaps, sep); | |
790 | (*count)++; | |
791 | } else { | |
792 | zfsctl_sep_free(sep); | |
793 | } | |
794 | ||
795 | sep = next; | |
796 | } | |
797 | ||
798 | mutex_exit(&zsb->z_ctldir_lock); | |
799 | ||
800 | return ((*count > 0) ? EEXIST : 0); | |
801 | } | |
802 | ||
fd4f7616 TC |
803 | #define MOUNT_BUSY 0x80 /* Mount failed due to EBUSY (from mntent.h) */ |
804 | ||
805 | #define SET_MOUNT_CMD \ | |
ebe7e575 BB |
806 | "exec 0</dev/null " \ |
807 | " 1>/dev/null " \ | |
808 | " 2>/dev/null; " \ | |
c7dfc086 | 809 | "mount -t zfs -n '%s' '%s'" |
ebe7e575 BB |
810 | |
811 | int | |
812 | zfsctl_mount_snapshot(struct path *path, int flags) | |
813 | { | |
814 | struct dentry *dentry = path->dentry; | |
815 | struct inode *ip = dentry->d_inode; | |
816 | zfs_sb_t *zsb = ITOZSB(ip); | |
817 | char *full_name, *full_path; | |
818 | zfs_snapentry_t *sep; | |
819 | zfs_snapentry_t search; | |
820 | char *argv[] = { "/bin/sh", "-c", NULL, NULL }; | |
821 | char *envp[] = { NULL }; | |
822 | int error; | |
823 | ||
824 | ZFS_ENTER(zsb); | |
825 | ||
826 | full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP); | |
827 | full_path = kmem_zalloc(PATH_MAX, KM_SLEEP); | |
828 | ||
829 | error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name); | |
830 | if (error) | |
831 | goto error; | |
832 | ||
833 | error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path); | |
834 | if (error) | |
835 | goto error; | |
836 | ||
837 | /* | |
838 | * Attempt to mount the snapshot from user space. Normally this | |
839 | * would be done using the vfs_kern_mount() function, however that | |
840 | * function is marked GPL-only and cannot be used. On error we | |
841 | * careful to log the real error to the console and return EISDIR | |
842 | * to safely abort the automount. This should be very rare. | |
fd4f7616 TC |
843 | * |
844 | * If the user mode helper happens to return EBUSY, a concurrent | |
845 | * mount is already in progress in which case the error is ignored. | |
846 | * Take note that if the program was executed successfully the return | |
847 | * value from call_usermodehelper() will be (exitcode << 8 + signal). | |
ebe7e575 BB |
848 | */ |
849 | argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path); | |
761394b3 | 850 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
ebe7e575 | 851 | strfree(argv[2]); |
fd4f7616 | 852 | if (error && !(error & MOUNT_BUSY << 8)) { |
ebe7e575 BB |
853 | printk("ZFS: Unable to automount %s at %s: %d\n", |
854 | full_name, full_path, error); | |
2e528b49 | 855 | error = SET_ERROR(EISDIR); |
ebe7e575 BB |
856 | goto error; |
857 | } | |
858 | ||
fd4f7616 | 859 | error = 0; |
ebe7e575 BB |
860 | mutex_enter(&zsb->z_ctldir_lock); |
861 | ||
862 | /* | |
863 | * Ensure a previous entry does not exist, if it does safely remove | |
864 | * it any cancel the outstanding expiration. This can occur when a | |
865 | * snapshot is manually unmounted and then an automount is triggered. | |
866 | */ | |
867 | search.se_name = full_name; | |
868 | sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL); | |
869 | if (sep) { | |
870 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
2ae10319 | 871 | taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid); |
ebe7e575 BB |
872 | zfsctl_sep_free(sep); |
873 | } | |
874 | ||
875 | sep = zfsctl_sep_alloc(); | |
876 | sep->se_name = full_name; | |
877 | sep->se_path = full_path; | |
878 | sep->se_inode = ip; | |
879 | avl_add(&zsb->z_ctldir_snaps, sep); | |
880 | ||
2ae10319 BB |
881 | sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq, |
882 | zfsctl_expire_snapshot, sep, TQ_SLEEP, | |
883 | ddi_get_lbolt() + zfs_expire_snapshot * HZ); | |
ebe7e575 BB |
884 | |
885 | mutex_exit(&zsb->z_ctldir_lock); | |
886 | error: | |
887 | if (error) { | |
888 | kmem_free(full_name, MAXNAMELEN); | |
889 | kmem_free(full_path, PATH_MAX); | |
890 | } | |
891 | ||
892 | ZFS_EXIT(zsb); | |
893 | ||
894 | return (error); | |
895 | } | |
896 | ||
897 | /* | |
898 | * Check if this super block has a matching objset id. | |
899 | */ | |
900 | static int | |
901 | zfsctl_test_super(struct super_block *sb, void *objsetidp) | |
902 | { | |
903 | zfs_sb_t *zsb = sb->s_fs_info; | |
904 | uint64_t objsetid = *(uint64_t *)objsetidp; | |
905 | ||
906 | return (dmu_objset_id(zsb->z_os) == objsetid); | |
907 | } | |
908 | ||
909 | /* | |
910 | * Prevent a new super block from being allocated if an existing one | |
911 | * could not be located. We only want to preform a lookup operation. | |
912 | */ | |
913 | static int | |
914 | zfsctl_set_super(struct super_block *sb, void *objsetidp) | |
915 | { | |
916 | return (-EEXIST); | |
917 | } | |
918 | ||
919 | int | |
920 | zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp) | |
921 | { | |
922 | zfs_sb_t *zsb = sb->s_fs_info; | |
923 | struct super_block *sbp; | |
924 | zfs_snapentry_t *sep; | |
925 | uint64_t id; | |
926 | int error; | |
927 | ||
928 | ASSERT(zsb->z_ctldir != NULL); | |
929 | ||
930 | mutex_enter(&zsb->z_ctldir_lock); | |
931 | ||
932 | /* | |
933 | * Verify that the snapshot is mounted. | |
934 | */ | |
935 | sep = avl_first(&zsb->z_ctldir_snaps); | |
936 | while (sep != NULL) { | |
6772fb67 | 937 | error = dmu_snapshot_lookup(zsb->z_os, sep->se_name, &id); |
ebe7e575 BB |
938 | if (error) |
939 | goto out; | |
940 | ||
941 | if (id == objsetid) | |
942 | break; | |
943 | ||
944 | sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep); | |
945 | } | |
946 | ||
947 | if (sep != NULL) { | |
948 | /* | |
949 | * Lookup the mounted root rather than the covered mount | |
950 | * point. This may fail if the snapshot has just been | |
951 | * unmounted by an unrelated user space process. This | |
952 | * race cannot occur to an expired mount point because | |
953 | * we hold the zsb->z_ctldir_lock to prevent the race. | |
954 | */ | |
3c203610 YS |
955 | sbp = zpl_sget(&zpl_fs_type, zfsctl_test_super, |
956 | zfsctl_set_super, 0, &id); | |
ebe7e575 BB |
957 | if (IS_ERR(sbp)) { |
958 | error = -PTR_ERR(sbp); | |
959 | } else { | |
960 | *zsbp = sbp->s_fs_info; | |
961 | deactivate_super(sbp); | |
962 | } | |
963 | } else { | |
2e528b49 | 964 | error = SET_ERROR(EINVAL); |
ebe7e575 BB |
965 | } |
966 | out: | |
967 | mutex_exit(&zsb->z_ctldir_lock); | |
968 | ASSERT3S(error, >=, 0); | |
969 | ||
970 | return (error); | |
971 | } | |
972 | ||
973 | /* ARGSUSED */ | |
974 | int | |
975 | zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, | |
976 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
977 | { | |
978 | zfs_sb_t *zsb = ITOZSB(dip); | |
979 | struct inode *ip; | |
980 | znode_t *dzp; | |
981 | int error; | |
982 | ||
983 | ZFS_ENTER(zsb); | |
984 | ||
985 | if (zsb->z_shares_dir == 0) { | |
986 | ZFS_EXIT(zsb); | |
2e528b49 | 987 | return (SET_ERROR(ENOTSUP)); |
ebe7e575 BB |
988 | } |
989 | ||
990 | error = zfs_zget(zsb, zsb->z_shares_dir, &dzp); | |
991 | if (error) { | |
992 | ZFS_EXIT(zsb); | |
993 | return (error); | |
994 | } | |
995 | ||
996 | error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL); | |
997 | ||
998 | iput(ZTOI(dzp)); | |
999 | ZFS_EXIT(zsb); | |
1000 | ||
1001 | return (error); | |
1002 | } | |
1003 | ||
1004 | ||
1005 | /* | |
1006 | * Initialize the various pieces we'll need to create and manipulate .zfs | |
1007 | * directories. Currently this is unused but available. | |
1008 | */ | |
1009 | void | |
1010 | zfsctl_init(void) | |
1011 | { | |
2ae10319 BB |
1012 | zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri, |
1013 | 1, 8, TASKQ_PREPOPULATE); | |
ebe7e575 BB |
1014 | } |
1015 | ||
1016 | /* | |
1017 | * Cleanup the various pieces we needed for .zfs directories. In particular | |
1018 | * ensure the expiry timer is canceled safely. | |
1019 | */ | |
1020 | void | |
1021 | zfsctl_fini(void) | |
1022 | { | |
2ae10319 | 1023 | taskq_destroy(zfs_expire_taskq); |
ebe7e575 BB |
1024 | } |
1025 | ||
1026 | module_param(zfs_expire_snapshot, int, 0644); | |
1027 | MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); |