]>
Commit | Line | Data |
---|---|---|
ebe7e575 BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * | |
23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Copyright (C) 2011 Lawrence Livermore National Security, LLC. | |
25 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
26 | * LLNL-CODE-403049. | |
27 | * Rewritten for Linux by: | |
28 | * Rohan Puri <rohan.puri15@gmail.com> | |
29 | * Brian Behlendorf <behlendorf1@llnl.gov> | |
30 | */ | |
31 | ||
32 | /* | |
33 | * ZFS control directory (a.k.a. ".zfs") | |
34 | * | |
35 | * This directory provides a common location for all ZFS meta-objects. | |
36 | * Currently, this is only the 'snapshot' and 'shares' directory, but this may | |
37 | * expand in the future. The elements are built dynamically, as the hierarchy | |
38 | * does not actually exist on disk. | |
39 | * | |
40 | * For 'snapshot', we don't want to have all snapshots always mounted, because | |
41 | * this would take up a huge amount of space in /etc/mnttab. We have three | |
42 | * types of objects: | |
43 | * | |
44 | * ctldir ------> snapshotdir -------> snapshot | |
45 | * | | |
46 | * | | |
47 | * V | |
48 | * mounted fs | |
49 | * | |
50 | * The 'snapshot' node contains just enough information to lookup '..' and act | |
51 | * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we | |
52 | * perform an automount of the underlying filesystem and return the | |
53 | * corresponding inode. | |
54 | * | |
55 | * All mounts are handled automatically by an user mode helper which invokes | |
56 | * the mount mount procedure. Unmounts are handled by allowing the mount | |
57 | * point to expire so the kernel may automatically unmount it. | |
58 | * | |
59 | * The '.zfs', '.zfs/snapshot', and all directories created under | |
60 | * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same | |
61 | * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under). | |
62 | * | |
63 | * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths | |
64 | * (ie: snapshots) are complete ZFS filesystems and have their own unique | |
65 | * zfs_sb_t. However, the fsid reported by these mounts will be the same | |
66 | * as that used by the parent zfs_sb_t to make NFS happy. | |
67 | */ | |
68 | ||
69 | #include <sys/types.h> | |
70 | #include <sys/param.h> | |
71 | #include <sys/time.h> | |
72 | #include <sys/systm.h> | |
73 | #include <sys/sysmacros.h> | |
74 | #include <sys/pathname.h> | |
75 | #include <sys/vfs.h> | |
76 | #include <sys/vfs_opreg.h> | |
77 | #include <sys/zfs_ctldir.h> | |
78 | #include <sys/zfs_ioctl.h> | |
79 | #include <sys/zfs_vfsops.h> | |
80 | #include <sys/zfs_vnops.h> | |
81 | #include <sys/stat.h> | |
82 | #include <sys/dmu.h> | |
13fe0198 | 83 | #include <sys/dsl_destroy.h> |
ebe7e575 BB |
84 | #include <sys/dsl_deleg.h> |
85 | #include <sys/mount.h> | |
86 | #include <sys/zpl.h> | |
87 | #include "zfs_namecheck.h" | |
88 | ||
89 | /* | |
90 | * Control Directory Tunables (.zfs) | |
91 | */ | |
92 | int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; | |
93 | ||
2ae10319 BB |
94 | /* |
95 | * Dedicated task queue for unmounting snapshots. | |
96 | */ | |
97 | static taskq_t *zfs_expire_taskq; | |
98 | ||
ebe7e575 BB |
99 | static zfs_snapentry_t * |
100 | zfsctl_sep_alloc(void) | |
101 | { | |
102 | return kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP); | |
103 | } | |
104 | ||
105 | void | |
106 | zfsctl_sep_free(zfs_snapentry_t *sep) | |
107 | { | |
108 | kmem_free(sep->se_name, MAXNAMELEN); | |
109 | kmem_free(sep->se_path, PATH_MAX); | |
110 | kmem_free(sep, sizeof (zfs_snapentry_t)); | |
111 | } | |
112 | ||
113 | /* | |
114 | * Attempt to expire an automounted snapshot, unmounts are attempted every | |
115 | * 'zfs_expire_snapshot' seconds until they succeed. The work request is | |
116 | * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t. | |
117 | */ | |
118 | static void | |
119 | zfsctl_expire_snapshot(void *data) | |
120 | { | |
2ae10319 BB |
121 | zfs_snapentry_t *sep = (zfs_snapentry_t *)data; |
122 | zfs_sb_t *zsb = ITOZSB(sep->se_inode); | |
ebe7e575 BB |
123 | int error; |
124 | ||
ebe7e575 BB |
125 | error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE); |
126 | if (error == EBUSY) | |
2ae10319 BB |
127 | sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq, |
128 | zfsctl_expire_snapshot, sep, TQ_SLEEP, | |
129 | ddi_get_lbolt() + zfs_expire_snapshot * HZ); | |
ebe7e575 BB |
130 | } |
131 | ||
132 | int | |
133 | snapentry_compare(const void *a, const void *b) | |
134 | { | |
135 | const zfs_snapentry_t *sa = a; | |
136 | const zfs_snapentry_t *sb = b; | |
137 | int ret = strcmp(sa->se_name, sb->se_name); | |
138 | ||
139 | if (ret < 0) | |
140 | return (-1); | |
141 | else if (ret > 0) | |
142 | return (1); | |
143 | else | |
144 | return (0); | |
145 | } | |
146 | ||
147 | boolean_t | |
148 | zfsctl_is_node(struct inode *ip) | |
149 | { | |
150 | return (ITOZ(ip)->z_is_ctldir); | |
151 | } | |
152 | ||
153 | boolean_t | |
154 | zfsctl_is_snapdir(struct inode *ip) | |
155 | { | |
156 | return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); | |
157 | } | |
158 | ||
159 | /* | |
160 | * Allocate a new inode with the passed id and ops. | |
161 | */ | |
162 | static struct inode * | |
163 | zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id, | |
164 | const struct file_operations *fops, const struct inode_operations *ops) | |
165 | { | |
166 | struct timespec now = current_fs_time(zsb->z_sb); | |
167 | struct inode *ip; | |
168 | znode_t *zp; | |
169 | ||
170 | ip = new_inode(zsb->z_sb); | |
171 | if (ip == NULL) | |
172 | return (NULL); | |
173 | ||
174 | zp = ITOZ(ip); | |
175 | ASSERT3P(zp->z_dirlocks, ==, NULL); | |
176 | ASSERT3P(zp->z_acl_cached, ==, NULL); | |
177 | ASSERT3P(zp->z_xattr_cached, ==, NULL); | |
178 | zp->z_id = id; | |
179 | zp->z_unlinked = 0; | |
180 | zp->z_atime_dirty = 0; | |
181 | zp->z_zn_prefetch = 0; | |
182 | zp->z_moved = 0; | |
183 | zp->z_sa_hdl = NULL; | |
184 | zp->z_blksz = 0; | |
185 | zp->z_seq = 0; | |
186 | zp->z_mapcnt = 0; | |
187 | zp->z_gen = 0; | |
188 | zp->z_size = 0; | |
189 | zp->z_atime[0] = 0; | |
190 | zp->z_atime[1] = 0; | |
191 | zp->z_links = 0; | |
192 | zp->z_pflags = 0; | |
193 | zp->z_uid = 0; | |
194 | zp->z_gid = 0; | |
195 | zp->z_mode = 0; | |
196 | zp->z_sync_cnt = 0; | |
197 | zp->z_is_zvol = B_FALSE; | |
198 | zp->z_is_mapped = B_FALSE; | |
199 | zp->z_is_ctldir = B_TRUE; | |
200 | zp->z_is_sa = B_FALSE; | |
7b3e34ba | 201 | zp->z_is_stale = B_FALSE; |
ebe7e575 BB |
202 | ip->i_ino = id; |
203 | ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO); | |
570d6edf RY |
204 | ip->i_uid = SUID_TO_KUID(0); |
205 | ip->i_gid = SGID_TO_KGID(0); | |
ebe7e575 BB |
206 | ip->i_blkbits = SPA_MINBLOCKSHIFT; |
207 | ip->i_atime = now; | |
208 | ip->i_mtime = now; | |
209 | ip->i_ctime = now; | |
210 | ip->i_fop = fops; | |
211 | ip->i_op = ops; | |
212 | ||
213 | if (insert_inode_locked(ip)) { | |
214 | unlock_new_inode(ip); | |
215 | iput(ip); | |
216 | return (NULL); | |
217 | } | |
218 | ||
219 | mutex_enter(&zsb->z_znodes_lock); | |
220 | list_insert_tail(&zsb->z_all_znodes, zp); | |
9ed86e7c | 221 | zsb->z_nr_znodes++; |
ebe7e575 BB |
222 | membar_producer(); |
223 | mutex_exit(&zsb->z_znodes_lock); | |
224 | ||
225 | unlock_new_inode(ip); | |
226 | ||
227 | return (ip); | |
228 | } | |
229 | ||
230 | /* | |
231 | * Lookup the inode with given id, it will be allocated if needed. | |
232 | */ | |
233 | static struct inode * | |
fc173c85 | 234 | zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id, |
ebe7e575 BB |
235 | const struct file_operations *fops, const struct inode_operations *ops) |
236 | { | |
237 | struct inode *ip = NULL; | |
238 | ||
239 | while (ip == NULL) { | |
fc173c85 | 240 | ip = ilookup(zsb->z_sb, (unsigned long)id); |
ebe7e575 BB |
241 | if (ip) |
242 | break; | |
243 | ||
244 | /* May fail due to concurrent zfsctl_inode_alloc() */ | |
245 | ip = zfsctl_inode_alloc(zsb, id, fops, ops); | |
246 | } | |
247 | ||
248 | return (ip); | |
249 | } | |
250 | ||
251 | /* | |
252 | * Free zfsctl inode specific structures, currently there are none. | |
253 | */ | |
254 | void | |
255 | zfsctl_inode_destroy(struct inode *ip) | |
256 | { | |
257 | return; | |
258 | } | |
259 | ||
260 | /* | |
261 | * An inode is being evicted from the cache. | |
262 | */ | |
263 | void | |
264 | zfsctl_inode_inactive(struct inode *ip) | |
265 | { | |
266 | if (zfsctl_is_snapdir(ip)) | |
267 | zfsctl_snapdir_inactive(ip); | |
268 | } | |
269 | ||
270 | /* | |
271 | * Create the '.zfs' directory. This directory is cached as part of the VFS | |
272 | * structure. This results in a hold on the zfs_sb_t. The code in zfs_umount() | |
273 | * therefore checks against a vfs_count of 2 instead of 1. This reference | |
274 | * is removed when the ctldir is destroyed in the unmount. All other entities | |
275 | * under the '.zfs' directory are created dynamically as needed. | |
fc173c85 BB |
276 | * |
277 | * Because the dynamically created '.zfs' directory entries assume the use | |
278 | * of 64-bit inode numbers this support must be disabled on 32-bit systems. | |
ebe7e575 BB |
279 | */ |
280 | int | |
281 | zfsctl_create(zfs_sb_t *zsb) | |
282 | { | |
fc173c85 | 283 | #if defined(CONFIG_64BIT) |
ebe7e575 BB |
284 | ASSERT(zsb->z_ctldir == NULL); |
285 | ||
286 | zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT, | |
287 | &zpl_fops_root, &zpl_ops_root); | |
288 | if (zsb->z_ctldir == NULL) | |
289 | return (ENOENT); | |
290 | ||
291 | return (0); | |
fc173c85 BB |
292 | #else |
293 | return (EOPNOTSUPP); | |
294 | #endif /* CONFIG_64BIT */ | |
ebe7e575 BB |
295 | } |
296 | ||
297 | /* | |
298 | * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. | |
299 | */ | |
300 | void | |
301 | zfsctl_destroy(zfs_sb_t *zsb) | |
302 | { | |
303 | iput(zsb->z_ctldir); | |
304 | zsb->z_ctldir = NULL; | |
305 | } | |
306 | ||
307 | /* | |
308 | * Given a root znode, retrieve the associated .zfs directory. | |
309 | * Add a hold to the vnode and return it. | |
310 | */ | |
311 | struct inode * | |
312 | zfsctl_root(znode_t *zp) | |
313 | { | |
314 | ASSERT(zfs_has_ctldir(zp)); | |
315 | igrab(ZTOZSB(zp)->z_ctldir); | |
316 | return (ZTOZSB(zp)->z_ctldir); | |
317 | } | |
318 | ||
319 | /*ARGSUSED*/ | |
320 | int | |
321 | zfsctl_fid(struct inode *ip, fid_t *fidp) | |
322 | { | |
323 | znode_t *zp = ITOZ(ip); | |
324 | zfs_sb_t *zsb = ITOZSB(ip); | |
325 | uint64_t object = zp->z_id; | |
326 | zfid_short_t *zfid; | |
327 | int i; | |
328 | ||
329 | ZFS_ENTER(zsb); | |
330 | ||
331 | if (fidp->fid_len < SHORT_FID_LEN) { | |
332 | fidp->fid_len = SHORT_FID_LEN; | |
333 | ZFS_EXIT(zsb); | |
334 | return (ENOSPC); | |
335 | } | |
336 | ||
337 | zfid = (zfid_short_t *)fidp; | |
338 | ||
339 | zfid->zf_len = SHORT_FID_LEN; | |
340 | ||
341 | for (i = 0; i < sizeof (zfid->zf_object); i++) | |
342 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
343 | ||
344 | /* .zfs znodes always have a generation number of 0 */ | |
345 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
346 | zfid->zf_gen[i] = 0; | |
347 | ||
348 | ZFS_EXIT(zsb); | |
349 | return (0); | |
350 | } | |
351 | ||
352 | static int | |
353 | zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname) | |
354 | { | |
355 | objset_t *os = ITOZSB(ip)->z_os; | |
356 | ||
357 | if (snapshot_namecheck(name, NULL, NULL) != 0) | |
358 | return (EILSEQ); | |
359 | ||
360 | dmu_objset_name(os, zname); | |
361 | if ((strlen(zname) + 1 + strlen(name)) >= len) | |
362 | return (ENAMETOOLONG); | |
363 | ||
364 | (void) strcat(zname, "@"); | |
365 | (void) strcat(zname, name); | |
366 | ||
367 | return (0); | |
368 | } | |
369 | ||
370 | static int | |
371 | zfsctl_snapshot_zpath(struct path *path, int len, char *zpath) | |
372 | { | |
373 | char *path_buffer, *path_ptr; | |
374 | int path_len, error = 0; | |
375 | ||
376 | path_buffer = kmem_alloc(len, KM_SLEEP); | |
377 | ||
378 | path_ptr = d_path(path, path_buffer, len); | |
379 | if (IS_ERR(path_ptr)) { | |
380 | error = -PTR_ERR(path_ptr); | |
381 | goto out; | |
382 | } | |
383 | ||
384 | path_len = path_buffer + len - 1 - path_ptr; | |
385 | if (path_len > len) { | |
386 | error = EFAULT; | |
387 | goto out; | |
388 | } | |
389 | ||
390 | memcpy(zpath, path_ptr, path_len); | |
391 | zpath[path_len] = '\0'; | |
392 | out: | |
393 | kmem_free(path_buffer, len); | |
394 | ||
395 | return (error); | |
396 | } | |
397 | ||
398 | /* | |
399 | * Special case the handling of "..". | |
400 | */ | |
401 | /* ARGSUSED */ | |
402 | int | |
403 | zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp, | |
404 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
405 | { | |
406 | zfs_sb_t *zsb = ITOZSB(dip); | |
407 | int error = 0; | |
408 | ||
409 | ZFS_ENTER(zsb); | |
410 | ||
411 | if (strcmp(name, "..") == 0) { | |
412 | *ipp = dip->i_sb->s_root->d_inode; | |
413 | } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { | |
414 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR, | |
415 | &zpl_fops_snapdir, &zpl_ops_snapdir); | |
416 | } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { | |
417 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES, | |
418 | &zpl_fops_shares, &zpl_ops_shares); | |
419 | } else { | |
420 | *ipp = NULL; | |
421 | } | |
422 | ||
423 | if (*ipp == NULL) | |
424 | error = ENOENT; | |
425 | ||
426 | ZFS_EXIT(zsb); | |
427 | ||
428 | return (error); | |
429 | } | |
430 | ||
431 | /* | |
432 | * Lookup entry point for the 'snapshot' directory. Try to open the | |
433 | * snapshot if it exist, creating the pseudo filesystem inode as necessary. | |
434 | * Perform a mount of the associated dataset on top of the inode. | |
435 | */ | |
436 | /* ARGSUSED */ | |
437 | int | |
438 | zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp, | |
439 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
440 | { | |
441 | zfs_sb_t *zsb = ITOZSB(dip); | |
442 | uint64_t id; | |
443 | int error; | |
444 | ||
445 | ZFS_ENTER(zsb); | |
446 | ||
6772fb67 | 447 | error = dmu_snapshot_lookup(zsb->z_os, name, &id); |
ebe7e575 BB |
448 | if (error) { |
449 | ZFS_EXIT(zsb); | |
450 | return (error); | |
451 | } | |
452 | ||
453 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id, | |
454 | &simple_dir_operations, &simple_dir_inode_operations); | |
455 | if (*ipp) { | |
456 | #ifdef HAVE_AUTOMOUNT | |
457 | (*ipp)->i_flags |= S_AUTOMOUNT; | |
458 | #endif /* HAVE_AUTOMOUNT */ | |
459 | } else { | |
460 | error = ENOENT; | |
461 | } | |
462 | ||
463 | ZFS_EXIT(zsb); | |
464 | ||
465 | return (error); | |
466 | } | |
467 | ||
468 | static void | |
469 | zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name) | |
470 | { | |
471 | avl_index_t where; | |
472 | ||
473 | ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock)); | |
474 | ASSERT(sep != NULL); | |
475 | ||
476 | /* | |
477 | * Change the name in the AVL tree. | |
478 | */ | |
479 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
480 | (void) strcpy(sep->se_name, name); | |
481 | VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL); | |
482 | avl_insert(&zsb->z_ctldir_snaps, sep, where); | |
483 | } | |
484 | ||
485 | /* | |
486 | * Renaming a directory under '.zfs/snapshot' will automatically trigger | |
487 | * a rename of the snapshot to the new given name. The rename is confined | |
488 | * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. | |
489 | */ | |
490 | /*ARGSUSED*/ | |
491 | int | |
13fe0198 MA |
492 | zfsctl_snapdir_rename(struct inode *sdip, char *snm, |
493 | struct inode *tdip, char *tnm, cred_t *cr, int flags) | |
ebe7e575 BB |
494 | { |
495 | zfs_sb_t *zsb = ITOZSB(sdip); | |
496 | zfs_snapentry_t search, *sep; | |
497 | avl_index_t where; | |
13fe0198 | 498 | char *to, *from, *real, *fsname; |
ebe7e575 BB |
499 | int error; |
500 | ||
501 | ZFS_ENTER(zsb); | |
502 | ||
503 | to = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
504 | from = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
505 | real = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
13fe0198 | 506 | fsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); |
ebe7e575 BB |
507 | |
508 | if (zsb->z_case == ZFS_CASE_INSENSITIVE) { | |
13fe0198 | 509 | error = dmu_snapshot_realname(zsb->z_os, snm, real, |
ebe7e575 BB |
510 | MAXNAMELEN, NULL); |
511 | if (error == 0) { | |
13fe0198 | 512 | snm = real; |
ebe7e575 BB |
513 | } else if (error != ENOTSUP) { |
514 | goto out; | |
515 | } | |
516 | } | |
517 | ||
13fe0198 MA |
518 | dmu_objset_name(zsb->z_os, fsname); |
519 | ||
520 | error = zfsctl_snapshot_zname(sdip, snm, MAXNAMELEN, from); | |
521 | if (error == 0) | |
522 | error = zfsctl_snapshot_zname(tdip, tnm, MAXNAMELEN, to); | |
523 | if (error == 0) | |
ebe7e575 | 524 | error = zfs_secpolicy_rename_perms(from, to, cr); |
13fe0198 | 525 | if (error != 0) |
ebe7e575 BB |
526 | goto out; |
527 | ||
528 | /* | |
529 | * Cannot move snapshots out of the snapdir. | |
530 | */ | |
531 | if (sdip != tdip) { | |
532 | error = EINVAL; | |
533 | goto out; | |
534 | } | |
535 | ||
536 | /* | |
537 | * No-op when names are identical. | |
538 | */ | |
13fe0198 | 539 | if (strcmp(snm, tnm) == 0) { |
ebe7e575 BB |
540 | error = 0; |
541 | goto out; | |
542 | } | |
543 | ||
544 | mutex_enter(&zsb->z_ctldir_lock); | |
545 | ||
13fe0198 | 546 | error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE); |
ebe7e575 BB |
547 | if (error) |
548 | goto out_unlock; | |
549 | ||
13fe0198 | 550 | search.se_name = (char *)snm; |
ebe7e575 BB |
551 | sep = avl_find(&zsb->z_ctldir_snaps, &search, &where); |
552 | if (sep) | |
13fe0198 | 553 | zfsctl_rename_snap(zsb, sep, tnm); |
ebe7e575 BB |
554 | |
555 | out_unlock: | |
556 | mutex_exit(&zsb->z_ctldir_lock); | |
557 | out: | |
558 | kmem_free(from, MAXNAMELEN); | |
559 | kmem_free(to, MAXNAMELEN); | |
560 | kmem_free(real, MAXNAMELEN); | |
13fe0198 | 561 | kmem_free(fsname, MAXNAMELEN); |
ebe7e575 BB |
562 | |
563 | ZFS_EXIT(zsb); | |
564 | ||
565 | return (error); | |
566 | } | |
567 | ||
568 | /* | |
569 | * Removing a directory under '.zfs/snapshot' will automatically trigger | |
570 | * the removal of the snapshot with the given name. | |
571 | */ | |
572 | /* ARGSUSED */ | |
573 | int | |
574 | zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) | |
575 | { | |
576 | zfs_sb_t *zsb = ITOZSB(dip); | |
577 | char *snapname, *real; | |
578 | int error; | |
579 | ||
580 | ZFS_ENTER(zsb); | |
581 | ||
582 | snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
583 | real = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
584 | ||
585 | if (zsb->z_case == ZFS_CASE_INSENSITIVE) { | |
586 | error = dmu_snapshot_realname(zsb->z_os, name, real, | |
587 | MAXNAMELEN, NULL); | |
588 | if (error == 0) { | |
589 | name = real; | |
590 | } else if (error != ENOTSUP) { | |
591 | goto out; | |
592 | } | |
593 | } | |
594 | ||
595 | error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname); | |
13fe0198 | 596 | if (error == 0) |
ebe7e575 | 597 | error = zfs_secpolicy_destroy_perms(snapname, cr); |
13fe0198 | 598 | if (error != 0) |
ebe7e575 BB |
599 | goto out; |
600 | ||
601 | error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE); | |
602 | if ((error == 0) || (error == ENOENT)) | |
13fe0198 | 603 | error = dsl_destroy_snapshot(snapname, B_FALSE); |
ebe7e575 BB |
604 | out: |
605 | kmem_free(snapname, MAXNAMELEN); | |
606 | kmem_free(real, MAXNAMELEN); | |
607 | ||
608 | ZFS_EXIT(zsb); | |
609 | ||
610 | return (error); | |
611 | } | |
612 | ||
613 | /* | |
614 | * Creating a directory under '.zfs/snapshot' will automatically trigger | |
615 | * the creation of a new snapshot with the given name. | |
616 | */ | |
617 | /* ARGSUSED */ | |
618 | int | |
619 | zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, | |
620 | struct inode **ipp, cred_t *cr, int flags) | |
621 | { | |
622 | zfs_sb_t *zsb = ITOZSB(dip); | |
623 | char *dsname; | |
624 | int error; | |
625 | ||
626 | dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
627 | ||
628 | if (snapshot_namecheck(dirname, NULL, NULL) != 0) { | |
629 | error = EILSEQ; | |
630 | goto out; | |
631 | } | |
632 | ||
633 | dmu_objset_name(zsb->z_os, dsname); | |
634 | ||
635 | error = zfs_secpolicy_snapshot_perms(dsname, cr); | |
13fe0198 | 636 | if (error != 0) |
ebe7e575 BB |
637 | goto out; |
638 | ||
639 | if (error == 0) { | |
6f1ffb06 | 640 | error = dmu_objset_snapshot_one(dsname, dirname); |
13fe0198 | 641 | if (error != 0) |
ebe7e575 BB |
642 | goto out; |
643 | ||
644 | error = zfsctl_snapdir_lookup(dip, dirname, ipp, | |
645 | 0, cr, NULL, NULL); | |
646 | } | |
647 | out: | |
648 | kmem_free(dsname, MAXNAMELEN); | |
649 | ||
650 | return (error); | |
651 | } | |
652 | ||
653 | /* | |
654 | * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed | |
655 | * from the snapshot list. This will normally happen as part of the auto | |
656 | * unmount, however in the case of a manual snapshot unmount this will be | |
657 | * the only notification we receive. | |
658 | */ | |
659 | void | |
660 | zfsctl_snapdir_inactive(struct inode *ip) | |
661 | { | |
662 | zfs_sb_t *zsb = ITOZSB(ip); | |
663 | zfs_snapentry_t *sep, *next; | |
664 | ||
665 | mutex_enter(&zsb->z_ctldir_lock); | |
666 | ||
667 | sep = avl_first(&zsb->z_ctldir_snaps); | |
668 | while (sep != NULL) { | |
669 | next = AVL_NEXT(&zsb->z_ctldir_snaps, sep); | |
670 | ||
671 | if (sep->se_inode == ip) { | |
672 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
2ae10319 | 673 | taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid); |
ebe7e575 BB |
674 | zfsctl_sep_free(sep); |
675 | break; | |
676 | } | |
677 | sep = next; | |
678 | } | |
679 | ||
680 | mutex_exit(&zsb->z_ctldir_lock); | |
681 | } | |
682 | ||
683 | /* | |
684 | * Attempt to unmount a snapshot by making a call to user space. | |
685 | * There is no assurance that this can or will succeed, is just a | |
686 | * best effort. In the case where it does fail, perhaps because | |
687 | * it's in use, the unmount will fail harmlessly. | |
688 | */ | |
689 | #define SET_UNMOUNT_CMD \ | |
690 | "exec 0</dev/null " \ | |
691 | " 1>/dev/null " \ | |
692 | " 2>/dev/null; " \ | |
94a9bb47 | 693 | "umount -t zfs -n %s'%s'" |
ebe7e575 BB |
694 | |
695 | static int | |
696 | __zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags) | |
697 | { | |
698 | char *argv[] = { "/bin/sh", "-c", NULL, NULL }; | |
699 | char *envp[] = { NULL }; | |
700 | int error; | |
701 | ||
702 | argv[2] = kmem_asprintf(SET_UNMOUNT_CMD, | |
703 | flags & MNT_FORCE ? "-f " : "", sep->se_path); | |
761394b3 | 704 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
ebe7e575 BB |
705 | strfree(argv[2]); |
706 | ||
707 | /* | |
708 | * The umount system utility will return 256 on error. We must | |
709 | * assume this error is because the file system is busy so it is | |
710 | * converted to the more sensible EBUSY. | |
711 | */ | |
712 | if (error) | |
713 | error = EBUSY; | |
714 | ||
715 | /* | |
716 | * This was the result of a manual unmount, cancel the delayed work | |
717 | * to prevent zfsctl_expire_snapshot() from attempting a unmount. | |
718 | */ | |
719 | if ((error == 0) && !(flags & MNT_EXPIRE)) | |
2ae10319 BB |
720 | taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid); |
721 | ||
ebe7e575 BB |
722 | |
723 | return (error); | |
724 | } | |
725 | ||
726 | int | |
727 | zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags) | |
728 | { | |
729 | zfs_snapentry_t search; | |
730 | zfs_snapentry_t *sep; | |
731 | int error = 0; | |
732 | ||
733 | mutex_enter(&zsb->z_ctldir_lock); | |
734 | ||
735 | search.se_name = name; | |
736 | sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL); | |
737 | if (sep) { | |
738 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
76351672 BB |
739 | mutex_exit(&zsb->z_ctldir_lock); |
740 | ||
ebe7e575 | 741 | error = __zfsctl_unmount_snapshot(sep, flags); |
76351672 BB |
742 | |
743 | mutex_enter(&zsb->z_ctldir_lock); | |
ebe7e575 BB |
744 | if (error == EBUSY) |
745 | avl_add(&zsb->z_ctldir_snaps, sep); | |
746 | else | |
747 | zfsctl_sep_free(sep); | |
748 | } else { | |
749 | error = ENOENT; | |
750 | } | |
751 | ||
752 | mutex_exit(&zsb->z_ctldir_lock); | |
753 | ASSERT3S(error, >=, 0); | |
754 | ||
755 | return (error); | |
756 | } | |
757 | ||
758 | /* | |
759 | * Traverse all mounted snapshots and attempt to unmount them. This | |
760 | * is best effort, on failure EEXIST is returned and count will be set | |
761 | * to the number of file snapshots which could not be unmounted. | |
762 | */ | |
763 | int | |
764 | zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count) | |
765 | { | |
766 | zfs_snapentry_t *sep, *next; | |
767 | int error = 0; | |
768 | ||
769 | *count = 0; | |
770 | ||
771 | ASSERT(zsb->z_ctldir != NULL); | |
772 | mutex_enter(&zsb->z_ctldir_lock); | |
773 | ||
774 | sep = avl_first(&zsb->z_ctldir_snaps); | |
775 | while (sep != NULL) { | |
776 | next = AVL_NEXT(&zsb->z_ctldir_snaps, sep); | |
777 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
76351672 BB |
778 | mutex_exit(&zsb->z_ctldir_lock); |
779 | ||
ebe7e575 | 780 | error = __zfsctl_unmount_snapshot(sep, flags); |
76351672 BB |
781 | |
782 | mutex_enter(&zsb->z_ctldir_lock); | |
ebe7e575 BB |
783 | if (error == EBUSY) { |
784 | avl_add(&zsb->z_ctldir_snaps, sep); | |
785 | (*count)++; | |
786 | } else { | |
787 | zfsctl_sep_free(sep); | |
788 | } | |
789 | ||
790 | sep = next; | |
791 | } | |
792 | ||
793 | mutex_exit(&zsb->z_ctldir_lock); | |
794 | ||
795 | return ((*count > 0) ? EEXIST : 0); | |
796 | } | |
797 | ||
798 | #define SET_MOUNT_CMD \ | |
799 | "exec 0</dev/null " \ | |
800 | " 1>/dev/null " \ | |
801 | " 2>/dev/null; " \ | |
c7dfc086 | 802 | "mount -t zfs -n '%s' '%s'" |
ebe7e575 BB |
803 | |
804 | int | |
805 | zfsctl_mount_snapshot(struct path *path, int flags) | |
806 | { | |
807 | struct dentry *dentry = path->dentry; | |
808 | struct inode *ip = dentry->d_inode; | |
809 | zfs_sb_t *zsb = ITOZSB(ip); | |
810 | char *full_name, *full_path; | |
811 | zfs_snapentry_t *sep; | |
812 | zfs_snapentry_t search; | |
813 | char *argv[] = { "/bin/sh", "-c", NULL, NULL }; | |
814 | char *envp[] = { NULL }; | |
815 | int error; | |
816 | ||
817 | ZFS_ENTER(zsb); | |
818 | ||
819 | full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP); | |
820 | full_path = kmem_zalloc(PATH_MAX, KM_SLEEP); | |
821 | ||
822 | error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name); | |
823 | if (error) | |
824 | goto error; | |
825 | ||
826 | error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path); | |
827 | if (error) | |
828 | goto error; | |
829 | ||
830 | /* | |
831 | * Attempt to mount the snapshot from user space. Normally this | |
832 | * would be done using the vfs_kern_mount() function, however that | |
833 | * function is marked GPL-only and cannot be used. On error we | |
834 | * careful to log the real error to the console and return EISDIR | |
835 | * to safely abort the automount. This should be very rare. | |
836 | */ | |
837 | argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path); | |
761394b3 | 838 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
ebe7e575 BB |
839 | strfree(argv[2]); |
840 | if (error) { | |
841 | printk("ZFS: Unable to automount %s at %s: %d\n", | |
842 | full_name, full_path, error); | |
843 | error = EISDIR; | |
844 | goto error; | |
845 | } | |
846 | ||
847 | mutex_enter(&zsb->z_ctldir_lock); | |
848 | ||
849 | /* | |
850 | * Ensure a previous entry does not exist, if it does safely remove | |
851 | * it any cancel the outstanding expiration. This can occur when a | |
852 | * snapshot is manually unmounted and then an automount is triggered. | |
853 | */ | |
854 | search.se_name = full_name; | |
855 | sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL); | |
856 | if (sep) { | |
857 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
2ae10319 | 858 | taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid); |
ebe7e575 BB |
859 | zfsctl_sep_free(sep); |
860 | } | |
861 | ||
862 | sep = zfsctl_sep_alloc(); | |
863 | sep->se_name = full_name; | |
864 | sep->se_path = full_path; | |
865 | sep->se_inode = ip; | |
866 | avl_add(&zsb->z_ctldir_snaps, sep); | |
867 | ||
2ae10319 BB |
868 | sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq, |
869 | zfsctl_expire_snapshot, sep, TQ_SLEEP, | |
870 | ddi_get_lbolt() + zfs_expire_snapshot * HZ); | |
ebe7e575 BB |
871 | |
872 | mutex_exit(&zsb->z_ctldir_lock); | |
873 | error: | |
874 | if (error) { | |
875 | kmem_free(full_name, MAXNAMELEN); | |
876 | kmem_free(full_path, PATH_MAX); | |
877 | } | |
878 | ||
879 | ZFS_EXIT(zsb); | |
880 | ||
881 | return (error); | |
882 | } | |
883 | ||
884 | /* | |
885 | * Check if this super block has a matching objset id. | |
886 | */ | |
887 | static int | |
888 | zfsctl_test_super(struct super_block *sb, void *objsetidp) | |
889 | { | |
890 | zfs_sb_t *zsb = sb->s_fs_info; | |
891 | uint64_t objsetid = *(uint64_t *)objsetidp; | |
892 | ||
893 | return (dmu_objset_id(zsb->z_os) == objsetid); | |
894 | } | |
895 | ||
896 | /* | |
897 | * Prevent a new super block from being allocated if an existing one | |
898 | * could not be located. We only want to preform a lookup operation. | |
899 | */ | |
900 | static int | |
901 | zfsctl_set_super(struct super_block *sb, void *objsetidp) | |
902 | { | |
903 | return (-EEXIST); | |
904 | } | |
905 | ||
906 | int | |
907 | zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp) | |
908 | { | |
909 | zfs_sb_t *zsb = sb->s_fs_info; | |
910 | struct super_block *sbp; | |
911 | zfs_snapentry_t *sep; | |
912 | uint64_t id; | |
913 | int error; | |
914 | ||
915 | ASSERT(zsb->z_ctldir != NULL); | |
916 | ||
917 | mutex_enter(&zsb->z_ctldir_lock); | |
918 | ||
919 | /* | |
920 | * Verify that the snapshot is mounted. | |
921 | */ | |
922 | sep = avl_first(&zsb->z_ctldir_snaps); | |
923 | while (sep != NULL) { | |
6772fb67 | 924 | error = dmu_snapshot_lookup(zsb->z_os, sep->se_name, &id); |
ebe7e575 BB |
925 | if (error) |
926 | goto out; | |
927 | ||
928 | if (id == objsetid) | |
929 | break; | |
930 | ||
931 | sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep); | |
932 | } | |
933 | ||
934 | if (sep != NULL) { | |
935 | /* | |
936 | * Lookup the mounted root rather than the covered mount | |
937 | * point. This may fail if the snapshot has just been | |
938 | * unmounted by an unrelated user space process. This | |
939 | * race cannot occur to an expired mount point because | |
940 | * we hold the zsb->z_ctldir_lock to prevent the race. | |
941 | */ | |
3c203610 YS |
942 | sbp = zpl_sget(&zpl_fs_type, zfsctl_test_super, |
943 | zfsctl_set_super, 0, &id); | |
ebe7e575 BB |
944 | if (IS_ERR(sbp)) { |
945 | error = -PTR_ERR(sbp); | |
946 | } else { | |
947 | *zsbp = sbp->s_fs_info; | |
948 | deactivate_super(sbp); | |
949 | } | |
950 | } else { | |
951 | error = EINVAL; | |
952 | } | |
953 | out: | |
954 | mutex_exit(&zsb->z_ctldir_lock); | |
955 | ASSERT3S(error, >=, 0); | |
956 | ||
957 | return (error); | |
958 | } | |
959 | ||
960 | /* ARGSUSED */ | |
961 | int | |
962 | zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, | |
963 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
964 | { | |
965 | zfs_sb_t *zsb = ITOZSB(dip); | |
966 | struct inode *ip; | |
967 | znode_t *dzp; | |
968 | int error; | |
969 | ||
970 | ZFS_ENTER(zsb); | |
971 | ||
972 | if (zsb->z_shares_dir == 0) { | |
973 | ZFS_EXIT(zsb); | |
45ca2d91 | 974 | return (ENOTSUP); |
ebe7e575 BB |
975 | } |
976 | ||
977 | error = zfs_zget(zsb, zsb->z_shares_dir, &dzp); | |
978 | if (error) { | |
979 | ZFS_EXIT(zsb); | |
980 | return (error); | |
981 | } | |
982 | ||
983 | error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL); | |
984 | ||
985 | iput(ZTOI(dzp)); | |
986 | ZFS_EXIT(zsb); | |
987 | ||
988 | return (error); | |
989 | } | |
990 | ||
991 | ||
992 | /* | |
993 | * Initialize the various pieces we'll need to create and manipulate .zfs | |
994 | * directories. Currently this is unused but available. | |
995 | */ | |
996 | void | |
997 | zfsctl_init(void) | |
998 | { | |
2ae10319 BB |
999 | zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri, |
1000 | 1, 8, TASKQ_PREPOPULATE); | |
ebe7e575 BB |
1001 | } |
1002 | ||
1003 | /* | |
1004 | * Cleanup the various pieces we needed for .zfs directories. In particular | |
1005 | * ensure the expiry timer is canceled safely. | |
1006 | */ | |
1007 | void | |
1008 | zfsctl_fini(void) | |
1009 | { | |
2ae10319 | 1010 | taskq_destroy(zfs_expire_taskq); |
ebe7e575 BB |
1011 | } |
1012 | ||
1013 | module_param(zfs_expire_snapshot, int, 0644); | |
1014 | MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); |