]>
Commit | Line | Data |
---|---|---|
ebe7e575 BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * | |
23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Copyright (C) 2011 Lawrence Livermore National Security, LLC. | |
25 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
26 | * LLNL-CODE-403049. | |
27 | * Rewritten for Linux by: | |
28 | * Rohan Puri <rohan.puri15@gmail.com> | |
29 | * Brian Behlendorf <behlendorf1@llnl.gov> | |
30 | */ | |
31 | ||
32 | /* | |
33 | * ZFS control directory (a.k.a. ".zfs") | |
34 | * | |
35 | * This directory provides a common location for all ZFS meta-objects. | |
36 | * Currently, this is only the 'snapshot' and 'shares' directory, but this may | |
37 | * expand in the future. The elements are built dynamically, as the hierarchy | |
38 | * does not actually exist on disk. | |
39 | * | |
40 | * For 'snapshot', we don't want to have all snapshots always mounted, because | |
41 | * this would take up a huge amount of space in /etc/mnttab. We have three | |
42 | * types of objects: | |
43 | * | |
44 | * ctldir ------> snapshotdir -------> snapshot | |
45 | * | | |
46 | * | | |
47 | * V | |
48 | * mounted fs | |
49 | * | |
50 | * The 'snapshot' node contains just enough information to lookup '..' and act | |
51 | * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we | |
52 | * perform an automount of the underlying filesystem and return the | |
53 | * corresponding inode. | |
54 | * | |
55 | * All mounts are handled automatically by an user mode helper which invokes | |
56 | * the mount mount procedure. Unmounts are handled by allowing the mount | |
57 | * point to expire so the kernel may automatically unmount it. | |
58 | * | |
59 | * The '.zfs', '.zfs/snapshot', and all directories created under | |
60 | * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same | |
61 | * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under). | |
62 | * | |
63 | * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths | |
64 | * (ie: snapshots) are complete ZFS filesystems and have their own unique | |
65 | * zfs_sb_t. However, the fsid reported by these mounts will be the same | |
66 | * as that used by the parent zfs_sb_t to make NFS happy. | |
67 | */ | |
68 | ||
69 | #include <sys/types.h> | |
70 | #include <sys/param.h> | |
71 | #include <sys/time.h> | |
72 | #include <sys/systm.h> | |
73 | #include <sys/sysmacros.h> | |
74 | #include <sys/pathname.h> | |
75 | #include <sys/vfs.h> | |
76 | #include <sys/vfs_opreg.h> | |
77 | #include <sys/zfs_ctldir.h> | |
78 | #include <sys/zfs_ioctl.h> | |
79 | #include <sys/zfs_vfsops.h> | |
80 | #include <sys/zfs_vnops.h> | |
81 | #include <sys/stat.h> | |
82 | #include <sys/dmu.h> | |
83 | #include <sys/dsl_deleg.h> | |
84 | #include <sys/mount.h> | |
85 | #include <sys/zpl.h> | |
86 | #include "zfs_namecheck.h" | |
87 | ||
88 | /* | |
89 | * Control Directory Tunables (.zfs) | |
90 | */ | |
91 | int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; | |
92 | ||
2ae10319 BB |
93 | /* |
94 | * Dedicated task queue for unmounting snapshots. | |
95 | */ | |
96 | static taskq_t *zfs_expire_taskq; | |
97 | ||
ebe7e575 BB |
98 | static zfs_snapentry_t * |
99 | zfsctl_sep_alloc(void) | |
100 | { | |
101 | return kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP); | |
102 | } | |
103 | ||
104 | void | |
105 | zfsctl_sep_free(zfs_snapentry_t *sep) | |
106 | { | |
107 | kmem_free(sep->se_name, MAXNAMELEN); | |
108 | kmem_free(sep->se_path, PATH_MAX); | |
109 | kmem_free(sep, sizeof (zfs_snapentry_t)); | |
110 | } | |
111 | ||
112 | /* | |
113 | * Attempt to expire an automounted snapshot, unmounts are attempted every | |
114 | * 'zfs_expire_snapshot' seconds until they succeed. The work request is | |
115 | * responsible for rescheduling itself and freeing the zfs_expire_snapshot_t. | |
116 | */ | |
117 | static void | |
118 | zfsctl_expire_snapshot(void *data) | |
119 | { | |
2ae10319 BB |
120 | zfs_snapentry_t *sep = (zfs_snapentry_t *)data; |
121 | zfs_sb_t *zsb = ITOZSB(sep->se_inode); | |
ebe7e575 BB |
122 | int error; |
123 | ||
ebe7e575 BB |
124 | error = zfsctl_unmount_snapshot(zsb, sep->se_name, MNT_EXPIRE); |
125 | if (error == EBUSY) | |
2ae10319 BB |
126 | sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq, |
127 | zfsctl_expire_snapshot, sep, TQ_SLEEP, | |
128 | ddi_get_lbolt() + zfs_expire_snapshot * HZ); | |
ebe7e575 BB |
129 | } |
130 | ||
131 | int | |
132 | snapentry_compare(const void *a, const void *b) | |
133 | { | |
134 | const zfs_snapentry_t *sa = a; | |
135 | const zfs_snapentry_t *sb = b; | |
136 | int ret = strcmp(sa->se_name, sb->se_name); | |
137 | ||
138 | if (ret < 0) | |
139 | return (-1); | |
140 | else if (ret > 0) | |
141 | return (1); | |
142 | else | |
143 | return (0); | |
144 | } | |
145 | ||
146 | boolean_t | |
147 | zfsctl_is_node(struct inode *ip) | |
148 | { | |
149 | return (ITOZ(ip)->z_is_ctldir); | |
150 | } | |
151 | ||
152 | boolean_t | |
153 | zfsctl_is_snapdir(struct inode *ip) | |
154 | { | |
155 | return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); | |
156 | } | |
157 | ||
158 | /* | |
159 | * Allocate a new inode with the passed id and ops. | |
160 | */ | |
161 | static struct inode * | |
162 | zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id, | |
163 | const struct file_operations *fops, const struct inode_operations *ops) | |
164 | { | |
165 | struct timespec now = current_fs_time(zsb->z_sb); | |
166 | struct inode *ip; | |
167 | znode_t *zp; | |
168 | ||
169 | ip = new_inode(zsb->z_sb); | |
170 | if (ip == NULL) | |
171 | return (NULL); | |
172 | ||
173 | zp = ITOZ(ip); | |
174 | ASSERT3P(zp->z_dirlocks, ==, NULL); | |
175 | ASSERT3P(zp->z_acl_cached, ==, NULL); | |
176 | ASSERT3P(zp->z_xattr_cached, ==, NULL); | |
177 | zp->z_id = id; | |
178 | zp->z_unlinked = 0; | |
179 | zp->z_atime_dirty = 0; | |
180 | zp->z_zn_prefetch = 0; | |
181 | zp->z_moved = 0; | |
182 | zp->z_sa_hdl = NULL; | |
183 | zp->z_blksz = 0; | |
184 | zp->z_seq = 0; | |
185 | zp->z_mapcnt = 0; | |
186 | zp->z_gen = 0; | |
187 | zp->z_size = 0; | |
188 | zp->z_atime[0] = 0; | |
189 | zp->z_atime[1] = 0; | |
190 | zp->z_links = 0; | |
191 | zp->z_pflags = 0; | |
192 | zp->z_uid = 0; | |
193 | zp->z_gid = 0; | |
194 | zp->z_mode = 0; | |
195 | zp->z_sync_cnt = 0; | |
196 | zp->z_is_zvol = B_FALSE; | |
197 | zp->z_is_mapped = B_FALSE; | |
198 | zp->z_is_ctldir = B_TRUE; | |
199 | zp->z_is_sa = B_FALSE; | |
7b3e34ba | 200 | zp->z_is_stale = B_FALSE; |
ebe7e575 BB |
201 | ip->i_ino = id; |
202 | ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO); | |
570d6edf RY |
203 | ip->i_uid = SUID_TO_KUID(0); |
204 | ip->i_gid = SGID_TO_KGID(0); | |
ebe7e575 BB |
205 | ip->i_blkbits = SPA_MINBLOCKSHIFT; |
206 | ip->i_atime = now; | |
207 | ip->i_mtime = now; | |
208 | ip->i_ctime = now; | |
209 | ip->i_fop = fops; | |
210 | ip->i_op = ops; | |
211 | ||
212 | if (insert_inode_locked(ip)) { | |
213 | unlock_new_inode(ip); | |
214 | iput(ip); | |
215 | return (NULL); | |
216 | } | |
217 | ||
218 | mutex_enter(&zsb->z_znodes_lock); | |
219 | list_insert_tail(&zsb->z_all_znodes, zp); | |
9ed86e7c | 220 | zsb->z_nr_znodes++; |
ebe7e575 BB |
221 | membar_producer(); |
222 | mutex_exit(&zsb->z_znodes_lock); | |
223 | ||
224 | unlock_new_inode(ip); | |
225 | ||
226 | return (ip); | |
227 | } | |
228 | ||
229 | /* | |
230 | * Lookup the inode with given id, it will be allocated if needed. | |
231 | */ | |
232 | static struct inode * | |
fc173c85 | 233 | zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id, |
ebe7e575 BB |
234 | const struct file_operations *fops, const struct inode_operations *ops) |
235 | { | |
236 | struct inode *ip = NULL; | |
237 | ||
238 | while (ip == NULL) { | |
fc173c85 | 239 | ip = ilookup(zsb->z_sb, (unsigned long)id); |
ebe7e575 BB |
240 | if (ip) |
241 | break; | |
242 | ||
243 | /* May fail due to concurrent zfsctl_inode_alloc() */ | |
244 | ip = zfsctl_inode_alloc(zsb, id, fops, ops); | |
245 | } | |
246 | ||
247 | return (ip); | |
248 | } | |
249 | ||
250 | /* | |
251 | * Free zfsctl inode specific structures, currently there are none. | |
252 | */ | |
253 | void | |
254 | zfsctl_inode_destroy(struct inode *ip) | |
255 | { | |
256 | return; | |
257 | } | |
258 | ||
259 | /* | |
260 | * An inode is being evicted from the cache. | |
261 | */ | |
262 | void | |
263 | zfsctl_inode_inactive(struct inode *ip) | |
264 | { | |
265 | if (zfsctl_is_snapdir(ip)) | |
266 | zfsctl_snapdir_inactive(ip); | |
267 | } | |
268 | ||
269 | /* | |
270 | * Create the '.zfs' directory. This directory is cached as part of the VFS | |
271 | * structure. This results in a hold on the zfs_sb_t. The code in zfs_umount() | |
272 | * therefore checks against a vfs_count of 2 instead of 1. This reference | |
273 | * is removed when the ctldir is destroyed in the unmount. All other entities | |
274 | * under the '.zfs' directory are created dynamically as needed. | |
fc173c85 BB |
275 | * |
276 | * Because the dynamically created '.zfs' directory entries assume the use | |
277 | * of 64-bit inode numbers this support must be disabled on 32-bit systems. | |
ebe7e575 BB |
278 | */ |
279 | int | |
280 | zfsctl_create(zfs_sb_t *zsb) | |
281 | { | |
fc173c85 | 282 | #if defined(CONFIG_64BIT) |
ebe7e575 BB |
283 | ASSERT(zsb->z_ctldir == NULL); |
284 | ||
285 | zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT, | |
286 | &zpl_fops_root, &zpl_ops_root); | |
287 | if (zsb->z_ctldir == NULL) | |
288 | return (ENOENT); | |
289 | ||
290 | return (0); | |
fc173c85 BB |
291 | #else |
292 | return (EOPNOTSUPP); | |
293 | #endif /* CONFIG_64BIT */ | |
ebe7e575 BB |
294 | } |
295 | ||
296 | /* | |
297 | * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. | |
298 | */ | |
299 | void | |
300 | zfsctl_destroy(zfs_sb_t *zsb) | |
301 | { | |
302 | iput(zsb->z_ctldir); | |
303 | zsb->z_ctldir = NULL; | |
304 | } | |
305 | ||
306 | /* | |
307 | * Given a root znode, retrieve the associated .zfs directory. | |
308 | * Add a hold to the vnode and return it. | |
309 | */ | |
310 | struct inode * | |
311 | zfsctl_root(znode_t *zp) | |
312 | { | |
313 | ASSERT(zfs_has_ctldir(zp)); | |
314 | igrab(ZTOZSB(zp)->z_ctldir); | |
315 | return (ZTOZSB(zp)->z_ctldir); | |
316 | } | |
317 | ||
318 | /*ARGSUSED*/ | |
319 | int | |
320 | zfsctl_fid(struct inode *ip, fid_t *fidp) | |
321 | { | |
322 | znode_t *zp = ITOZ(ip); | |
323 | zfs_sb_t *zsb = ITOZSB(ip); | |
324 | uint64_t object = zp->z_id; | |
325 | zfid_short_t *zfid; | |
326 | int i; | |
327 | ||
328 | ZFS_ENTER(zsb); | |
329 | ||
330 | if (fidp->fid_len < SHORT_FID_LEN) { | |
331 | fidp->fid_len = SHORT_FID_LEN; | |
332 | ZFS_EXIT(zsb); | |
333 | return (ENOSPC); | |
334 | } | |
335 | ||
336 | zfid = (zfid_short_t *)fidp; | |
337 | ||
338 | zfid->zf_len = SHORT_FID_LEN; | |
339 | ||
340 | for (i = 0; i < sizeof (zfid->zf_object); i++) | |
341 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
342 | ||
343 | /* .zfs znodes always have a generation number of 0 */ | |
344 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
345 | zfid->zf_gen[i] = 0; | |
346 | ||
347 | ZFS_EXIT(zsb); | |
348 | return (0); | |
349 | } | |
350 | ||
351 | static int | |
352 | zfsctl_snapshot_zname(struct inode *ip, const char *name, int len, char *zname) | |
353 | { | |
354 | objset_t *os = ITOZSB(ip)->z_os; | |
355 | ||
356 | if (snapshot_namecheck(name, NULL, NULL) != 0) | |
357 | return (EILSEQ); | |
358 | ||
359 | dmu_objset_name(os, zname); | |
360 | if ((strlen(zname) + 1 + strlen(name)) >= len) | |
361 | return (ENAMETOOLONG); | |
362 | ||
363 | (void) strcat(zname, "@"); | |
364 | (void) strcat(zname, name); | |
365 | ||
366 | return (0); | |
367 | } | |
368 | ||
369 | static int | |
370 | zfsctl_snapshot_zpath(struct path *path, int len, char *zpath) | |
371 | { | |
372 | char *path_buffer, *path_ptr; | |
373 | int path_len, error = 0; | |
374 | ||
375 | path_buffer = kmem_alloc(len, KM_SLEEP); | |
376 | ||
377 | path_ptr = d_path(path, path_buffer, len); | |
378 | if (IS_ERR(path_ptr)) { | |
379 | error = -PTR_ERR(path_ptr); | |
380 | goto out; | |
381 | } | |
382 | ||
383 | path_len = path_buffer + len - 1 - path_ptr; | |
384 | if (path_len > len) { | |
385 | error = EFAULT; | |
386 | goto out; | |
387 | } | |
388 | ||
389 | memcpy(zpath, path_ptr, path_len); | |
390 | zpath[path_len] = '\0'; | |
391 | out: | |
392 | kmem_free(path_buffer, len); | |
393 | ||
394 | return (error); | |
395 | } | |
396 | ||
397 | /* | |
398 | * Special case the handling of "..". | |
399 | */ | |
400 | /* ARGSUSED */ | |
401 | int | |
402 | zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp, | |
403 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
404 | { | |
405 | zfs_sb_t *zsb = ITOZSB(dip); | |
406 | int error = 0; | |
407 | ||
408 | ZFS_ENTER(zsb); | |
409 | ||
410 | if (strcmp(name, "..") == 0) { | |
411 | *ipp = dip->i_sb->s_root->d_inode; | |
412 | } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { | |
413 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR, | |
414 | &zpl_fops_snapdir, &zpl_ops_snapdir); | |
415 | } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { | |
416 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES, | |
417 | &zpl_fops_shares, &zpl_ops_shares); | |
418 | } else { | |
419 | *ipp = NULL; | |
420 | } | |
421 | ||
422 | if (*ipp == NULL) | |
423 | error = ENOENT; | |
424 | ||
425 | ZFS_EXIT(zsb); | |
426 | ||
427 | return (error); | |
428 | } | |
429 | ||
430 | /* | |
431 | * Lookup entry point for the 'snapshot' directory. Try to open the | |
432 | * snapshot if it exist, creating the pseudo filesystem inode as necessary. | |
433 | * Perform a mount of the associated dataset on top of the inode. | |
434 | */ | |
435 | /* ARGSUSED */ | |
436 | int | |
437 | zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp, | |
438 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
439 | { | |
440 | zfs_sb_t *zsb = ITOZSB(dip); | |
441 | uint64_t id; | |
442 | int error; | |
443 | ||
444 | ZFS_ENTER(zsb); | |
445 | ||
6772fb67 | 446 | error = dmu_snapshot_lookup(zsb->z_os, name, &id); |
ebe7e575 BB |
447 | if (error) { |
448 | ZFS_EXIT(zsb); | |
449 | return (error); | |
450 | } | |
451 | ||
452 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id, | |
453 | &simple_dir_operations, &simple_dir_inode_operations); | |
454 | if (*ipp) { | |
455 | #ifdef HAVE_AUTOMOUNT | |
456 | (*ipp)->i_flags |= S_AUTOMOUNT; | |
457 | #endif /* HAVE_AUTOMOUNT */ | |
458 | } else { | |
459 | error = ENOENT; | |
460 | } | |
461 | ||
462 | ZFS_EXIT(zsb); | |
463 | ||
464 | return (error); | |
465 | } | |
466 | ||
467 | static void | |
468 | zfsctl_rename_snap(zfs_sb_t *zsb, zfs_snapentry_t *sep, const char *name) | |
469 | { | |
470 | avl_index_t where; | |
471 | ||
472 | ASSERT(MUTEX_HELD(&zsb->z_ctldir_lock)); | |
473 | ASSERT(sep != NULL); | |
474 | ||
475 | /* | |
476 | * Change the name in the AVL tree. | |
477 | */ | |
478 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
479 | (void) strcpy(sep->se_name, name); | |
480 | VERIFY(avl_find(&zsb->z_ctldir_snaps, sep, &where) == NULL); | |
481 | avl_insert(&zsb->z_ctldir_snaps, sep, where); | |
482 | } | |
483 | ||
484 | /* | |
485 | * Renaming a directory under '.zfs/snapshot' will automatically trigger | |
486 | * a rename of the snapshot to the new given name. The rename is confined | |
487 | * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. | |
488 | */ | |
489 | /*ARGSUSED*/ | |
490 | int | |
491 | zfsctl_snapdir_rename(struct inode *sdip, char *sname, | |
492 | struct inode *tdip, char *tname, cred_t *cr, int flags) | |
493 | { | |
494 | zfs_sb_t *zsb = ITOZSB(sdip); | |
495 | zfs_snapentry_t search, *sep; | |
496 | avl_index_t where; | |
497 | char *to, *from, *real; | |
498 | int error; | |
499 | ||
500 | ZFS_ENTER(zsb); | |
501 | ||
502 | to = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
503 | from = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
504 | real = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
505 | ||
506 | if (zsb->z_case == ZFS_CASE_INSENSITIVE) { | |
507 | error = dmu_snapshot_realname(zsb->z_os, sname, real, | |
508 | MAXNAMELEN, NULL); | |
509 | if (error == 0) { | |
510 | sname = real; | |
511 | } else if (error != ENOTSUP) { | |
512 | goto out; | |
513 | } | |
514 | } | |
515 | ||
516 | error = zfsctl_snapshot_zname(sdip, sname, MAXNAMELEN, from); | |
517 | if (!error) | |
518 | error = zfsctl_snapshot_zname(tdip, tname, MAXNAMELEN, to); | |
519 | if (!error) | |
520 | error = zfs_secpolicy_rename_perms(from, to, cr); | |
521 | if (error) | |
522 | goto out; | |
523 | ||
524 | /* | |
525 | * Cannot move snapshots out of the snapdir. | |
526 | */ | |
527 | if (sdip != tdip) { | |
528 | error = EINVAL; | |
529 | goto out; | |
530 | } | |
531 | ||
532 | /* | |
533 | * No-op when names are identical. | |
534 | */ | |
535 | if (strcmp(sname, tname) == 0) { | |
536 | error = 0; | |
537 | goto out; | |
538 | } | |
539 | ||
540 | mutex_enter(&zsb->z_ctldir_lock); | |
541 | ||
542 | error = dmu_objset_rename(from, to, B_FALSE); | |
543 | if (error) | |
544 | goto out_unlock; | |
545 | ||
546 | search.se_name = (char *)sname; | |
547 | sep = avl_find(&zsb->z_ctldir_snaps, &search, &where); | |
548 | if (sep) | |
549 | zfsctl_rename_snap(zsb, sep, tname); | |
550 | ||
551 | out_unlock: | |
552 | mutex_exit(&zsb->z_ctldir_lock); | |
553 | out: | |
554 | kmem_free(from, MAXNAMELEN); | |
555 | kmem_free(to, MAXNAMELEN); | |
556 | kmem_free(real, MAXNAMELEN); | |
557 | ||
558 | ZFS_EXIT(zsb); | |
559 | ||
560 | return (error); | |
561 | } | |
562 | ||
563 | /* | |
564 | * Removing a directory under '.zfs/snapshot' will automatically trigger | |
565 | * the removal of the snapshot with the given name. | |
566 | */ | |
567 | /* ARGSUSED */ | |
568 | int | |
569 | zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) | |
570 | { | |
571 | zfs_sb_t *zsb = ITOZSB(dip); | |
572 | char *snapname, *real; | |
573 | int error; | |
574 | ||
575 | ZFS_ENTER(zsb); | |
576 | ||
577 | snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
578 | real = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
579 | ||
580 | if (zsb->z_case == ZFS_CASE_INSENSITIVE) { | |
581 | error = dmu_snapshot_realname(zsb->z_os, name, real, | |
582 | MAXNAMELEN, NULL); | |
583 | if (error == 0) { | |
584 | name = real; | |
585 | } else if (error != ENOTSUP) { | |
586 | goto out; | |
587 | } | |
588 | } | |
589 | ||
590 | error = zfsctl_snapshot_zname(dip, name, MAXNAMELEN, snapname); | |
591 | if (!error) | |
592 | error = zfs_secpolicy_destroy_perms(snapname, cr); | |
593 | if (error) | |
594 | goto out; | |
595 | ||
596 | error = zfsctl_unmount_snapshot(zsb, name, MNT_FORCE); | |
597 | if ((error == 0) || (error == ENOENT)) | |
598 | error = dmu_objset_destroy(snapname, B_FALSE); | |
599 | out: | |
600 | kmem_free(snapname, MAXNAMELEN); | |
601 | kmem_free(real, MAXNAMELEN); | |
602 | ||
603 | ZFS_EXIT(zsb); | |
604 | ||
605 | return (error); | |
606 | } | |
607 | ||
608 | /* | |
609 | * Creating a directory under '.zfs/snapshot' will automatically trigger | |
610 | * the creation of a new snapshot with the given name. | |
611 | */ | |
612 | /* ARGSUSED */ | |
613 | int | |
614 | zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, | |
615 | struct inode **ipp, cred_t *cr, int flags) | |
616 | { | |
617 | zfs_sb_t *zsb = ITOZSB(dip); | |
618 | char *dsname; | |
619 | int error; | |
620 | ||
621 | dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
622 | ||
623 | if (snapshot_namecheck(dirname, NULL, NULL) != 0) { | |
624 | error = EILSEQ; | |
625 | goto out; | |
626 | } | |
627 | ||
628 | dmu_objset_name(zsb->z_os, dsname); | |
629 | ||
630 | error = zfs_secpolicy_snapshot_perms(dsname, cr); | |
631 | if (error) | |
632 | goto out; | |
633 | ||
634 | if (error == 0) { | |
635 | error = dmu_objset_snapshot(dsname, dirname, | |
636 | NULL, NULL, B_FALSE, B_FALSE, -1); | |
637 | if (error) | |
638 | goto out; | |
639 | ||
640 | error = zfsctl_snapdir_lookup(dip, dirname, ipp, | |
641 | 0, cr, NULL, NULL); | |
642 | } | |
643 | out: | |
644 | kmem_free(dsname, MAXNAMELEN); | |
645 | ||
646 | return (error); | |
647 | } | |
648 | ||
649 | /* | |
650 | * When a .zfs/snapshot/<snapshot> inode is evicted they must be removed | |
651 | * from the snapshot list. This will normally happen as part of the auto | |
652 | * unmount, however in the case of a manual snapshot unmount this will be | |
653 | * the only notification we receive. | |
654 | */ | |
655 | void | |
656 | zfsctl_snapdir_inactive(struct inode *ip) | |
657 | { | |
658 | zfs_sb_t *zsb = ITOZSB(ip); | |
659 | zfs_snapentry_t *sep, *next; | |
660 | ||
661 | mutex_enter(&zsb->z_ctldir_lock); | |
662 | ||
663 | sep = avl_first(&zsb->z_ctldir_snaps); | |
664 | while (sep != NULL) { | |
665 | next = AVL_NEXT(&zsb->z_ctldir_snaps, sep); | |
666 | ||
667 | if (sep->se_inode == ip) { | |
668 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
2ae10319 | 669 | taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid); |
ebe7e575 BB |
670 | zfsctl_sep_free(sep); |
671 | break; | |
672 | } | |
673 | sep = next; | |
674 | } | |
675 | ||
676 | mutex_exit(&zsb->z_ctldir_lock); | |
677 | } | |
678 | ||
679 | /* | |
680 | * Attempt to unmount a snapshot by making a call to user space. | |
681 | * There is no assurance that this can or will succeed, is just a | |
682 | * best effort. In the case where it does fail, perhaps because | |
683 | * it's in use, the unmount will fail harmlessly. | |
684 | */ | |
685 | #define SET_UNMOUNT_CMD \ | |
686 | "exec 0</dev/null " \ | |
687 | " 1>/dev/null " \ | |
688 | " 2>/dev/null; " \ | |
94a9bb47 | 689 | "umount -t zfs -n %s'%s'" |
ebe7e575 BB |
690 | |
691 | static int | |
692 | __zfsctl_unmount_snapshot(zfs_snapentry_t *sep, int flags) | |
693 | { | |
694 | char *argv[] = { "/bin/sh", "-c", NULL, NULL }; | |
695 | char *envp[] = { NULL }; | |
696 | int error; | |
697 | ||
698 | argv[2] = kmem_asprintf(SET_UNMOUNT_CMD, | |
699 | flags & MNT_FORCE ? "-f " : "", sep->se_path); | |
761394b3 | 700 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
ebe7e575 BB |
701 | strfree(argv[2]); |
702 | ||
703 | /* | |
704 | * The umount system utility will return 256 on error. We must | |
705 | * assume this error is because the file system is busy so it is | |
706 | * converted to the more sensible EBUSY. | |
707 | */ | |
708 | if (error) | |
709 | error = EBUSY; | |
710 | ||
711 | /* | |
712 | * This was the result of a manual unmount, cancel the delayed work | |
713 | * to prevent zfsctl_expire_snapshot() from attempting a unmount. | |
714 | */ | |
715 | if ((error == 0) && !(flags & MNT_EXPIRE)) | |
2ae10319 BB |
716 | taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid); |
717 | ||
ebe7e575 BB |
718 | |
719 | return (error); | |
720 | } | |
721 | ||
722 | int | |
723 | zfsctl_unmount_snapshot(zfs_sb_t *zsb, char *name, int flags) | |
724 | { | |
725 | zfs_snapentry_t search; | |
726 | zfs_snapentry_t *sep; | |
727 | int error = 0; | |
728 | ||
729 | mutex_enter(&zsb->z_ctldir_lock); | |
730 | ||
731 | search.se_name = name; | |
732 | sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL); | |
733 | if (sep) { | |
734 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
76351672 BB |
735 | mutex_exit(&zsb->z_ctldir_lock); |
736 | ||
ebe7e575 | 737 | error = __zfsctl_unmount_snapshot(sep, flags); |
76351672 BB |
738 | |
739 | mutex_enter(&zsb->z_ctldir_lock); | |
ebe7e575 BB |
740 | if (error == EBUSY) |
741 | avl_add(&zsb->z_ctldir_snaps, sep); | |
742 | else | |
743 | zfsctl_sep_free(sep); | |
744 | } else { | |
745 | error = ENOENT; | |
746 | } | |
747 | ||
748 | mutex_exit(&zsb->z_ctldir_lock); | |
749 | ASSERT3S(error, >=, 0); | |
750 | ||
751 | return (error); | |
752 | } | |
753 | ||
754 | /* | |
755 | * Traverse all mounted snapshots and attempt to unmount them. This | |
756 | * is best effort, on failure EEXIST is returned and count will be set | |
757 | * to the number of file snapshots which could not be unmounted. | |
758 | */ | |
759 | int | |
760 | zfsctl_unmount_snapshots(zfs_sb_t *zsb, int flags, int *count) | |
761 | { | |
762 | zfs_snapentry_t *sep, *next; | |
763 | int error = 0; | |
764 | ||
765 | *count = 0; | |
766 | ||
767 | ASSERT(zsb->z_ctldir != NULL); | |
768 | mutex_enter(&zsb->z_ctldir_lock); | |
769 | ||
770 | sep = avl_first(&zsb->z_ctldir_snaps); | |
771 | while (sep != NULL) { | |
772 | next = AVL_NEXT(&zsb->z_ctldir_snaps, sep); | |
773 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
76351672 BB |
774 | mutex_exit(&zsb->z_ctldir_lock); |
775 | ||
ebe7e575 | 776 | error = __zfsctl_unmount_snapshot(sep, flags); |
76351672 BB |
777 | |
778 | mutex_enter(&zsb->z_ctldir_lock); | |
ebe7e575 BB |
779 | if (error == EBUSY) { |
780 | avl_add(&zsb->z_ctldir_snaps, sep); | |
781 | (*count)++; | |
782 | } else { | |
783 | zfsctl_sep_free(sep); | |
784 | } | |
785 | ||
786 | sep = next; | |
787 | } | |
788 | ||
789 | mutex_exit(&zsb->z_ctldir_lock); | |
790 | ||
791 | return ((*count > 0) ? EEXIST : 0); | |
792 | } | |
793 | ||
794 | #define SET_MOUNT_CMD \ | |
795 | "exec 0</dev/null " \ | |
796 | " 1>/dev/null " \ | |
797 | " 2>/dev/null; " \ | |
c7dfc086 | 798 | "mount -t zfs -n '%s' '%s'" |
ebe7e575 BB |
799 | |
800 | int | |
801 | zfsctl_mount_snapshot(struct path *path, int flags) | |
802 | { | |
803 | struct dentry *dentry = path->dentry; | |
804 | struct inode *ip = dentry->d_inode; | |
805 | zfs_sb_t *zsb = ITOZSB(ip); | |
806 | char *full_name, *full_path; | |
807 | zfs_snapentry_t *sep; | |
808 | zfs_snapentry_t search; | |
809 | char *argv[] = { "/bin/sh", "-c", NULL, NULL }; | |
810 | char *envp[] = { NULL }; | |
811 | int error; | |
812 | ||
813 | ZFS_ENTER(zsb); | |
814 | ||
815 | full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP); | |
816 | full_path = kmem_zalloc(PATH_MAX, KM_SLEEP); | |
817 | ||
818 | error = zfsctl_snapshot_zname(ip, dname(dentry), MAXNAMELEN, full_name); | |
819 | if (error) | |
820 | goto error; | |
821 | ||
822 | error = zfsctl_snapshot_zpath(path, PATH_MAX, full_path); | |
823 | if (error) | |
824 | goto error; | |
825 | ||
826 | /* | |
827 | * Attempt to mount the snapshot from user space. Normally this | |
828 | * would be done using the vfs_kern_mount() function, however that | |
829 | * function is marked GPL-only and cannot be used. On error we | |
830 | * careful to log the real error to the console and return EISDIR | |
831 | * to safely abort the automount. This should be very rare. | |
832 | */ | |
833 | argv[2] = kmem_asprintf(SET_MOUNT_CMD, full_name, full_path); | |
761394b3 | 834 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
ebe7e575 BB |
835 | strfree(argv[2]); |
836 | if (error) { | |
837 | printk("ZFS: Unable to automount %s at %s: %d\n", | |
838 | full_name, full_path, error); | |
839 | error = EISDIR; | |
840 | goto error; | |
841 | } | |
842 | ||
843 | mutex_enter(&zsb->z_ctldir_lock); | |
844 | ||
845 | /* | |
846 | * Ensure a previous entry does not exist, if it does safely remove | |
847 | * it any cancel the outstanding expiration. This can occur when a | |
848 | * snapshot is manually unmounted and then an automount is triggered. | |
849 | */ | |
850 | search.se_name = full_name; | |
851 | sep = avl_find(&zsb->z_ctldir_snaps, &search, NULL); | |
852 | if (sep) { | |
853 | avl_remove(&zsb->z_ctldir_snaps, sep); | |
2ae10319 | 854 | taskq_cancel_id(zfs_expire_taskq, sep->se_taskqid); |
ebe7e575 BB |
855 | zfsctl_sep_free(sep); |
856 | } | |
857 | ||
858 | sep = zfsctl_sep_alloc(); | |
859 | sep->se_name = full_name; | |
860 | sep->se_path = full_path; | |
861 | sep->se_inode = ip; | |
862 | avl_add(&zsb->z_ctldir_snaps, sep); | |
863 | ||
2ae10319 BB |
864 | sep->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq, |
865 | zfsctl_expire_snapshot, sep, TQ_SLEEP, | |
866 | ddi_get_lbolt() + zfs_expire_snapshot * HZ); | |
ebe7e575 BB |
867 | |
868 | mutex_exit(&zsb->z_ctldir_lock); | |
869 | error: | |
870 | if (error) { | |
871 | kmem_free(full_name, MAXNAMELEN); | |
872 | kmem_free(full_path, PATH_MAX); | |
873 | } | |
874 | ||
875 | ZFS_EXIT(zsb); | |
876 | ||
877 | return (error); | |
878 | } | |
879 | ||
880 | /* | |
881 | * Check if this super block has a matching objset id. | |
882 | */ | |
883 | static int | |
884 | zfsctl_test_super(struct super_block *sb, void *objsetidp) | |
885 | { | |
886 | zfs_sb_t *zsb = sb->s_fs_info; | |
887 | uint64_t objsetid = *(uint64_t *)objsetidp; | |
888 | ||
889 | return (dmu_objset_id(zsb->z_os) == objsetid); | |
890 | } | |
891 | ||
892 | /* | |
893 | * Prevent a new super block from being allocated if an existing one | |
894 | * could not be located. We only want to preform a lookup operation. | |
895 | */ | |
896 | static int | |
897 | zfsctl_set_super(struct super_block *sb, void *objsetidp) | |
898 | { | |
899 | return (-EEXIST); | |
900 | } | |
901 | ||
902 | int | |
903 | zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp) | |
904 | { | |
905 | zfs_sb_t *zsb = sb->s_fs_info; | |
906 | struct super_block *sbp; | |
907 | zfs_snapentry_t *sep; | |
908 | uint64_t id; | |
909 | int error; | |
910 | ||
911 | ASSERT(zsb->z_ctldir != NULL); | |
912 | ||
913 | mutex_enter(&zsb->z_ctldir_lock); | |
914 | ||
915 | /* | |
916 | * Verify that the snapshot is mounted. | |
917 | */ | |
918 | sep = avl_first(&zsb->z_ctldir_snaps); | |
919 | while (sep != NULL) { | |
6772fb67 | 920 | error = dmu_snapshot_lookup(zsb->z_os, sep->se_name, &id); |
ebe7e575 BB |
921 | if (error) |
922 | goto out; | |
923 | ||
924 | if (id == objsetid) | |
925 | break; | |
926 | ||
927 | sep = AVL_NEXT(&zsb->z_ctldir_snaps, sep); | |
928 | } | |
929 | ||
930 | if (sep != NULL) { | |
931 | /* | |
932 | * Lookup the mounted root rather than the covered mount | |
933 | * point. This may fail if the snapshot has just been | |
934 | * unmounted by an unrelated user space process. This | |
935 | * race cannot occur to an expired mount point because | |
936 | * we hold the zsb->z_ctldir_lock to prevent the race. | |
937 | */ | |
3c203610 YS |
938 | sbp = zpl_sget(&zpl_fs_type, zfsctl_test_super, |
939 | zfsctl_set_super, 0, &id); | |
ebe7e575 BB |
940 | if (IS_ERR(sbp)) { |
941 | error = -PTR_ERR(sbp); | |
942 | } else { | |
943 | *zsbp = sbp->s_fs_info; | |
944 | deactivate_super(sbp); | |
945 | } | |
946 | } else { | |
947 | error = EINVAL; | |
948 | } | |
949 | out: | |
950 | mutex_exit(&zsb->z_ctldir_lock); | |
951 | ASSERT3S(error, >=, 0); | |
952 | ||
953 | return (error); | |
954 | } | |
955 | ||
956 | /* ARGSUSED */ | |
957 | int | |
958 | zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, | |
959 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
960 | { | |
961 | zfs_sb_t *zsb = ITOZSB(dip); | |
962 | struct inode *ip; | |
963 | znode_t *dzp; | |
964 | int error; | |
965 | ||
966 | ZFS_ENTER(zsb); | |
967 | ||
968 | if (zsb->z_shares_dir == 0) { | |
969 | ZFS_EXIT(zsb); | |
45ca2d91 | 970 | return (ENOTSUP); |
ebe7e575 BB |
971 | } |
972 | ||
973 | error = zfs_zget(zsb, zsb->z_shares_dir, &dzp); | |
974 | if (error) { | |
975 | ZFS_EXIT(zsb); | |
976 | return (error); | |
977 | } | |
978 | ||
979 | error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL); | |
980 | ||
981 | iput(ZTOI(dzp)); | |
982 | ZFS_EXIT(zsb); | |
983 | ||
984 | return (error); | |
985 | } | |
986 | ||
987 | ||
988 | /* | |
989 | * Initialize the various pieces we'll need to create and manipulate .zfs | |
990 | * directories. Currently this is unused but available. | |
991 | */ | |
992 | void | |
993 | zfsctl_init(void) | |
994 | { | |
2ae10319 BB |
995 | zfs_expire_taskq = taskq_create("z_unmount", 1, maxclsyspri, |
996 | 1, 8, TASKQ_PREPOPULATE); | |
ebe7e575 BB |
997 | } |
998 | ||
999 | /* | |
1000 | * Cleanup the various pieces we needed for .zfs directories. In particular | |
1001 | * ensure the expiry timer is canceled safely. | |
1002 | */ | |
1003 | void | |
1004 | zfsctl_fini(void) | |
1005 | { | |
2ae10319 | 1006 | taskq_destroy(zfs_expire_taskq); |
ebe7e575 BB |
1007 | } |
1008 | ||
1009 | module_param(zfs_expire_snapshot, int, 0644); | |
1010 | MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); |