]>
Commit | Line | Data |
---|---|---|
87d546d8 TG |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * | |
23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Copyright (C) 2011 Lawrence Livermore National Security, LLC. | |
25 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
26 | * LLNL-CODE-403049. | |
27 | * Rewritten for Linux by: | |
28 | * Rohan Puri <rohan.puri15@gmail.com> | |
29 | * Brian Behlendorf <behlendorf1@llnl.gov> | |
30 | * Copyright (c) 2013 by Delphix. All rights reserved. | |
31 | */ | |
32 | ||
33 | /* | |
34 | * ZFS control directory (a.k.a. ".zfs") | |
35 | * | |
36 | * This directory provides a common location for all ZFS meta-objects. | |
37 | * Currently, this is only the 'snapshot' and 'shares' directory, but this may | |
38 | * expand in the future. The elements are built dynamically, as the hierarchy | |
39 | * does not actually exist on disk. | |
40 | * | |
41 | * For 'snapshot', we don't want to have all snapshots always mounted, because | |
42 | * this would take up a huge amount of space in /etc/mnttab. We have three | |
43 | * types of objects: | |
44 | * | |
45 | * ctldir ------> snapshotdir -------> snapshot | |
46 | * | | |
47 | * | | |
48 | * V | |
49 | * mounted fs | |
50 | * | |
51 | * The 'snapshot' node contains just enough information to lookup '..' and act | |
52 | * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we | |
53 | * perform an automount of the underlying filesystem and return the | |
54 | * corresponding inode. | |
55 | * | |
56 | * All mounts are handled automatically by an user mode helper which invokes | |
57 | * the mount mount procedure. Unmounts are handled by allowing the mount | |
58 | * point to expire so the kernel may automatically unmount it. | |
59 | * | |
60 | * The '.zfs', '.zfs/snapshot', and all directories created under | |
61 | * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same | |
62 | * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under). | |
63 | * | |
64 | * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths | |
65 | * (ie: snapshots) are complete ZFS filesystems and have their own unique | |
66 | * zfs_sb_t. However, the fsid reported by these mounts will be the same | |
67 | * as that used by the parent zfs_sb_t to make NFS happy. | |
68 | */ | |
69 | ||
70 | #include <sys/types.h> | |
71 | #include <sys/param.h> | |
72 | #include <sys/time.h> | |
73 | #include <sys/systm.h> | |
74 | #include <sys/sysmacros.h> | |
75 | #include <sys/pathname.h> | |
76 | #include <sys/vfs.h> | |
77 | #include <sys/vfs_opreg.h> | |
78 | #include <sys/zfs_ctldir.h> | |
79 | #include <sys/zfs_ioctl.h> | |
80 | #include <sys/zfs_vfsops.h> | |
81 | #include <sys/zfs_vnops.h> | |
82 | #include <sys/stat.h> | |
83 | #include <sys/dmu.h> | |
84 | #include <sys/dmu_objset.h> | |
85 | #include <sys/dsl_destroy.h> | |
86 | #include <sys/dsl_deleg.h> | |
87 | #include <sys/mount.h> | |
88 | #include <sys/zpl.h> | |
89 | #include "zfs_namecheck.h" | |
90 | ||
91 | /* | |
92 | * Two AVL trees are maintained which contain all currently automounted | |
93 | * snapshots. Every automounted snapshots maps to a single zfs_snapentry_t | |
94 | * entry which MUST: | |
95 | * | |
96 | * - be attached to both trees, and | |
97 | * - be unique, no duplicate entries are allowed. | |
98 | * | |
99 | * The zfs_snapshots_by_name tree is indexed by the full dataset name | |
100 | * while the zfs_snapshots_by_objsetid tree is indexed by the unique | |
101 | * objsetid. This allows for fast lookups either by name or objsetid. | |
102 | */ | |
103 | static avl_tree_t zfs_snapshots_by_name; | |
104 | static avl_tree_t zfs_snapshots_by_objsetid; | |
105 | static krwlock_t zfs_snapshot_lock; | |
106 | ||
107 | /* | |
108 | * Control Directory Tunables (.zfs) | |
109 | */ | |
110 | int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; | |
111 | int zfs_admin_snapshot = 0; | |
112 | ||
113 | /* | |
114 | * Dedicated task queue for unmounting snapshots. | |
115 | */ | |
116 | static taskq_t *zfs_expire_taskq; | |
117 | ||
118 | typedef struct { | |
119 | char *se_name; /* full snapshot name */ | |
120 | char *se_path; /* full mount path */ | |
121 | spa_t *se_spa; /* pool spa */ | |
122 | uint64_t se_objsetid; /* snapshot objset id */ | |
123 | struct dentry *se_root_dentry; /* snapshot root dentry */ | |
124 | taskqid_t se_taskqid; /* scheduled unmount taskqid */ | |
125 | avl_node_t se_node_name; /* zfs_snapshots_by_name link */ | |
126 | avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */ | |
127 | refcount_t se_refcount; /* reference count */ | |
128 | } zfs_snapentry_t; | |
129 | ||
130 | static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay); | |
131 | ||
132 | /* | |
133 | * Allocate a new zfs_snapentry_t being careful to make a copy of the | |
134 | * the snapshot name and provided mount point. No reference is taken. | |
135 | */ | |
136 | static zfs_snapentry_t * | |
137 | zfsctl_snapshot_alloc(char *full_name, char *full_path, spa_t *spa, | |
138 | uint64_t objsetid, struct dentry *root_dentry) | |
139 | { | |
140 | zfs_snapentry_t *se; | |
141 | ||
142 | se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP); | |
143 | ||
144 | se->se_name = strdup(full_name); | |
145 | se->se_path = strdup(full_path); | |
146 | se->se_spa = spa; | |
147 | se->se_objsetid = objsetid; | |
148 | se->se_root_dentry = root_dentry; | |
149 | se->se_taskqid = -1; | |
150 | ||
151 | refcount_create(&se->se_refcount); | |
152 | ||
153 | return (se); | |
154 | } | |
155 | ||
156 | /* | |
157 | * Free a zfs_snapentry_t the called must ensure there are no active | |
158 | * references. | |
159 | */ | |
160 | static void | |
161 | zfsctl_snapshot_free(zfs_snapentry_t *se) | |
162 | { | |
163 | refcount_destroy(&se->se_refcount); | |
164 | strfree(se->se_name); | |
165 | strfree(se->se_path); | |
166 | ||
167 | kmem_free(se, sizeof (zfs_snapentry_t)); | |
168 | } | |
169 | ||
170 | /* | |
171 | * Hold a reference on the zfs_snapentry_t. | |
172 | */ | |
173 | static void | |
174 | zfsctl_snapshot_hold(zfs_snapentry_t *se) | |
175 | { | |
176 | refcount_add(&se->se_refcount, NULL); | |
177 | } | |
178 | ||
179 | /* | |
180 | * Release a reference on the zfs_snapentry_t. When the number of | |
181 | * references drops to zero the structure will be freed. | |
182 | */ | |
183 | static void | |
184 | zfsctl_snapshot_rele(zfs_snapentry_t *se) | |
185 | { | |
186 | if (refcount_remove(&se->se_refcount, NULL) == 0) | |
187 | zfsctl_snapshot_free(se); | |
188 | } | |
189 | ||
190 | /* | |
191 | * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and | |
192 | * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part | |
193 | * of the trees a reference is held. | |
194 | */ | |
195 | static void | |
196 | zfsctl_snapshot_add(zfs_snapentry_t *se) | |
197 | { | |
198 | ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); | |
199 | refcount_add(&se->se_refcount, NULL); | |
200 | avl_add(&zfs_snapshots_by_name, se); | |
201 | avl_add(&zfs_snapshots_by_objsetid, se); | |
202 | } | |
203 | ||
204 | /* | |
205 | * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and | |
206 | * zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped, | |
207 | * this can result in the structure being freed if that was the last | |
208 | * remaining reference. | |
209 | */ | |
210 | static void | |
211 | zfsctl_snapshot_remove(zfs_snapentry_t *se) | |
212 | { | |
213 | ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); | |
214 | avl_remove(&zfs_snapshots_by_name, se); | |
215 | avl_remove(&zfs_snapshots_by_objsetid, se); | |
216 | zfsctl_snapshot_rele(se); | |
217 | } | |
218 | ||
219 | /* | |
220 | * Snapshot name comparison function for the zfs_snapshots_by_name. | |
221 | */ | |
222 | static int | |
223 | snapentry_compare_by_name(const void *a, const void *b) | |
224 | { | |
225 | const zfs_snapentry_t *se_a = a; | |
226 | const zfs_snapentry_t *se_b = b; | |
227 | int ret; | |
228 | ||
229 | ret = strcmp(se_a->se_name, se_b->se_name); | |
230 | ||
231 | if (ret < 0) | |
232 | return (-1); | |
233 | else if (ret > 0) | |
234 | return (1); | |
235 | else | |
236 | return (0); | |
237 | } | |
238 | ||
239 | /* | |
240 | * Snapshot name comparison function for the zfs_snapshots_by_objsetid. | |
241 | */ | |
242 | static int | |
243 | snapentry_compare_by_objsetid(const void *a, const void *b) | |
244 | { | |
245 | const zfs_snapentry_t *se_a = a; | |
246 | const zfs_snapentry_t *se_b = b; | |
247 | ||
248 | if (se_a->se_spa != se_b->se_spa) | |
249 | return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1); | |
250 | ||
251 | if (se_a->se_objsetid < se_b->se_objsetid) | |
252 | return (-1); | |
253 | else if (se_a->se_objsetid > se_b->se_objsetid) | |
254 | return (1); | |
255 | else | |
256 | return (0); | |
257 | } | |
258 | ||
259 | /* | |
260 | * Find a zfs_snapentry_t in zfs_snapshots_by_name. If the snapname | |
261 | * is found a pointer to the zfs_snapentry_t is returned and a reference | |
262 | * taken on the structure. The caller is responsible for dropping the | |
263 | * reference with zfsctl_snapshot_rele(). If the snapname is not found | |
264 | * NULL will be returned. | |
265 | */ | |
266 | static zfs_snapentry_t * | |
267 | zfsctl_snapshot_find_by_name(char *snapname) | |
268 | { | |
269 | zfs_snapentry_t *se, search; | |
270 | ||
271 | ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); | |
272 | ||
273 | search.se_name = snapname; | |
274 | se = avl_find(&zfs_snapshots_by_name, &search, NULL); | |
275 | if (se) | |
276 | refcount_add(&se->se_refcount, NULL); | |
277 | ||
278 | return (se); | |
279 | } | |
280 | ||
281 | /* | |
282 | * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id | |
283 | * rather than the snapname. In all other respects it behaves the same | |
284 | * as zfsctl_snapshot_find_by_name(). | |
285 | */ | |
286 | static zfs_snapentry_t * | |
287 | zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid) | |
288 | { | |
289 | zfs_snapentry_t *se, search; | |
290 | ||
291 | ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); | |
292 | ||
293 | search.se_spa = spa; | |
294 | search.se_objsetid = objsetid; | |
295 | se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL); | |
296 | if (se) | |
297 | refcount_add(&se->se_refcount, NULL); | |
298 | ||
299 | return (se); | |
300 | } | |
301 | ||
302 | /* | |
303 | * Rename a zfs_snapentry_t in the zfs_snapshots_by_name. The structure is | |
304 | * removed, renamed, and added back to the new correct location in the tree. | |
305 | */ | |
306 | static int | |
307 | zfsctl_snapshot_rename(char *old_snapname, char *new_snapname) | |
308 | { | |
309 | zfs_snapentry_t *se; | |
310 | ||
311 | ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); | |
312 | ||
313 | se = zfsctl_snapshot_find_by_name(old_snapname); | |
314 | if (se == NULL) | |
315 | return (ENOENT); | |
316 | ||
317 | zfsctl_snapshot_remove(se); | |
318 | strfree(se->se_name); | |
319 | se->se_name = strdup(new_snapname); | |
320 | zfsctl_snapshot_add(se); | |
321 | zfsctl_snapshot_rele(se); | |
322 | ||
323 | return (0); | |
324 | } | |
325 | ||
326 | /* | |
327 | * Delayed task responsible for unmounting an expired automounted snapshot. | |
328 | */ | |
329 | static void | |
330 | snapentry_expire(void *data) | |
331 | { | |
332 | zfs_snapentry_t *se = (zfs_snapentry_t *)data; | |
333 | spa_t *spa = se->se_spa; | |
334 | uint64_t objsetid = se->se_objsetid; | |
335 | ||
336 | if (zfs_expire_snapshot <= 0) { | |
337 | zfsctl_snapshot_rele(se); | |
338 | return; | |
339 | } | |
340 | ||
341 | se->se_taskqid = -1; | |
342 | (void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE); | |
343 | zfsctl_snapshot_rele(se); | |
344 | ||
345 | /* | |
346 | * Reschedule the unmount if the zfs_snapentry_t wasn't removed. | |
347 | * This can occur when the snapshot is busy. | |
348 | */ | |
349 | rw_enter(&zfs_snapshot_lock, RW_READER); | |
350 | if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { | |
351 | zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); | |
352 | zfsctl_snapshot_rele(se); | |
353 | } | |
354 | rw_exit(&zfs_snapshot_lock); | |
355 | } | |
356 | ||
357 | /* | |
358 | * Cancel an automatic unmount of a snapname. This callback is responsible | |
359 | * for dropping the reference on the zfs_snapentry_t which was taken when | |
360 | * during dispatch. | |
361 | */ | |
362 | static void | |
363 | zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se) | |
364 | { | |
365 | ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); | |
366 | ||
367 | if (taskq_cancel_id(zfs_expire_taskq, se->se_taskqid) == 0) { | |
368 | se->se_taskqid = -1; | |
369 | zfsctl_snapshot_rele(se); | |
370 | } | |
371 | } | |
372 | ||
373 | /* | |
374 | * Dispatch the unmount task for delayed handling with a hold protecting it. | |
375 | */ | |
376 | static void | |
377 | zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay) | |
378 | { | |
379 | ASSERT3S(se->se_taskqid, ==, -1); | |
380 | ||
381 | if (delay <= 0) | |
382 | return; | |
383 | ||
384 | zfsctl_snapshot_hold(se); | |
385 | se->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq, | |
386 | snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ); | |
387 | } | |
388 | ||
389 | /* | |
390 | * Schedule an automatic unmount of objset id to occur in delay seconds from | |
391 | * now. Any previous delayed unmount will be cancelled in favor of the | |
392 | * updated deadline. A reference is taken by zfsctl_snapshot_find_by_name() | |
393 | * and held until the outstanding task is handled or cancelled. | |
394 | */ | |
395 | int | |
396 | zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay) | |
397 | { | |
398 | zfs_snapentry_t *se; | |
399 | int error = ENOENT; | |
400 | ||
401 | rw_enter(&zfs_snapshot_lock, RW_READER); | |
402 | if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { | |
403 | zfsctl_snapshot_unmount_cancel(se); | |
404 | zfsctl_snapshot_unmount_delay_impl(se, delay); | |
405 | zfsctl_snapshot_rele(se); | |
406 | error = 0; | |
407 | } | |
408 | rw_exit(&zfs_snapshot_lock); | |
409 | ||
410 | return (error); | |
411 | } | |
412 | ||
413 | /* | |
414 | * Check if snapname is currently mounted. Returned non-zero when mounted | |
415 | * and zero when unmounted. | |
416 | */ | |
417 | static boolean_t | |
418 | zfsctl_snapshot_ismounted(char *snapname) | |
419 | { | |
420 | zfs_snapentry_t *se; | |
421 | boolean_t ismounted = B_FALSE; | |
422 | ||
423 | rw_enter(&zfs_snapshot_lock, RW_READER); | |
424 | if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) { | |
425 | zfsctl_snapshot_rele(se); | |
426 | ismounted = B_TRUE; | |
427 | } | |
428 | rw_exit(&zfs_snapshot_lock); | |
429 | ||
430 | return (ismounted); | |
431 | } | |
432 | ||
433 | /* | |
434 | * Check if the given inode is a part of the virtual .zfs directory. | |
435 | */ | |
436 | boolean_t | |
437 | zfsctl_is_node(struct inode *ip) | |
438 | { | |
439 | return (ITOZ(ip)->z_is_ctldir); | |
440 | } | |
441 | ||
442 | /* | |
443 | * Check if the given inode is a .zfs/snapshots/snapname directory. | |
444 | */ | |
445 | boolean_t | |
446 | zfsctl_is_snapdir(struct inode *ip) | |
447 | { | |
448 | return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); | |
449 | } | |
450 | ||
451 | /* | |
452 | * Allocate a new inode with the passed id and ops. | |
453 | */ | |
454 | static struct inode * | |
455 | zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id, | |
456 | const struct file_operations *fops, const struct inode_operations *ops) | |
457 | { | |
df9d7621 | 458 | struct timespec now; |
87d546d8 TG |
459 | struct inode *ip; |
460 | znode_t *zp; | |
461 | ||
462 | ip = new_inode(zsb->z_sb); | |
463 | if (ip == NULL) | |
464 | return (NULL); | |
465 | ||
df9d7621 | 466 | now = current_time(ip); |
87d546d8 TG |
467 | zp = ITOZ(ip); |
468 | ASSERT3P(zp->z_dirlocks, ==, NULL); | |
469 | ASSERT3P(zp->z_acl_cached, ==, NULL); | |
470 | ASSERT3P(zp->z_xattr_cached, ==, NULL); | |
471 | zp->z_id = id; | |
472 | zp->z_unlinked = 0; | |
473 | zp->z_atime_dirty = 0; | |
474 | zp->z_zn_prefetch = 0; | |
475 | zp->z_moved = 0; | |
476 | zp->z_sa_hdl = NULL; | |
477 | zp->z_blksz = 0; | |
478 | zp->z_seq = 0; | |
479 | zp->z_mapcnt = 0; | |
480 | zp->z_gen = 0; | |
481 | zp->z_size = 0; | |
482 | zp->z_links = 0; | |
483 | zp->z_pflags = 0; | |
484 | zp->z_uid = 0; | |
485 | zp->z_gid = 0; | |
486 | zp->z_mode = 0; | |
487 | zp->z_sync_cnt = 0; | |
488 | zp->z_is_mapped = B_FALSE; | |
489 | zp->z_is_ctldir = B_TRUE; | |
490 | zp->z_is_sa = B_FALSE; | |
491 | zp->z_is_stale = B_FALSE; | |
492 | ip->i_ino = id; | |
493 | ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO); | |
494 | ip->i_uid = SUID_TO_KUID(0); | |
495 | ip->i_gid = SGID_TO_KGID(0); | |
496 | ip->i_blkbits = SPA_MINBLOCKSHIFT; | |
497 | ip->i_atime = now; | |
498 | ip->i_mtime = now; | |
499 | ip->i_ctime = now; | |
500 | ip->i_fop = fops; | |
501 | ip->i_op = ops; | |
5b5f25ca CIK |
502 | #if defined(IOP_XATTR) |
503 | ip->i_opflags &= ~IOP_XATTR; | |
504 | #endif | |
87d546d8 TG |
505 | |
506 | if (insert_inode_locked(ip)) { | |
507 | unlock_new_inode(ip); | |
508 | iput(ip); | |
509 | return (NULL); | |
510 | } | |
511 | ||
512 | mutex_enter(&zsb->z_znodes_lock); | |
513 | list_insert_tail(&zsb->z_all_znodes, zp); | |
514 | zsb->z_nr_znodes++; | |
515 | membar_producer(); | |
516 | mutex_exit(&zsb->z_znodes_lock); | |
517 | ||
518 | unlock_new_inode(ip); | |
519 | ||
520 | return (ip); | |
521 | } | |
522 | ||
523 | /* | |
524 | * Lookup the inode with given id, it will be allocated if needed. | |
525 | */ | |
526 | static struct inode * | |
527 | zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id, | |
528 | const struct file_operations *fops, const struct inode_operations *ops) | |
529 | { | |
530 | struct inode *ip = NULL; | |
531 | ||
532 | while (ip == NULL) { | |
533 | ip = ilookup(zsb->z_sb, (unsigned long)id); | |
534 | if (ip) | |
535 | break; | |
536 | ||
537 | /* May fail due to concurrent zfsctl_inode_alloc() */ | |
538 | ip = zfsctl_inode_alloc(zsb, id, fops, ops); | |
539 | } | |
540 | ||
541 | return (ip); | |
542 | } | |
543 | ||
544 | /* | |
545 | * Create the '.zfs' directory. This directory is cached as part of the VFS | |
546 | * structure. This results in a hold on the zfs_sb_t. The code in zfs_umount() | |
547 | * therefore checks against a vfs_count of 2 instead of 1. This reference | |
548 | * is removed when the ctldir is destroyed in the unmount. All other entities | |
549 | * under the '.zfs' directory are created dynamically as needed. | |
550 | * | |
551 | * Because the dynamically created '.zfs' directory entries assume the use | |
552 | * of 64-bit inode numbers this support must be disabled on 32-bit systems. | |
553 | */ | |
554 | int | |
555 | zfsctl_create(zfs_sb_t *zsb) | |
556 | { | |
557 | #if defined(CONFIG_64BIT) | |
558 | ASSERT(zsb->z_ctldir == NULL); | |
559 | ||
560 | zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT, | |
561 | &zpl_fops_root, &zpl_ops_root); | |
562 | if (zsb->z_ctldir == NULL) | |
563 | return (SET_ERROR(ENOENT)); | |
564 | ||
565 | return (0); | |
566 | #else | |
567 | return (SET_ERROR(EOPNOTSUPP)); | |
568 | #endif /* CONFIG_64BIT */ | |
569 | } | |
570 | ||
571 | /* | |
572 | * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name. | |
573 | * Only called when the filesystem is unmounted. | |
574 | */ | |
575 | void | |
576 | zfsctl_destroy(zfs_sb_t *zsb) | |
577 | { | |
578 | if (zsb->z_issnap) { | |
579 | zfs_snapentry_t *se; | |
580 | spa_t *spa = zsb->z_os->os_spa; | |
581 | uint64_t objsetid = dmu_objset_id(zsb->z_os); | |
582 | ||
583 | rw_enter(&zfs_snapshot_lock, RW_WRITER); | |
584 | if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) | |
585 | != NULL) { | |
586 | zfsctl_snapshot_unmount_cancel(se); | |
587 | zfsctl_snapshot_remove(se); | |
588 | zfsctl_snapshot_rele(se); | |
589 | } | |
590 | rw_exit(&zfs_snapshot_lock); | |
591 | } else if (zsb->z_ctldir) { | |
592 | iput(zsb->z_ctldir); | |
593 | zsb->z_ctldir = NULL; | |
594 | } | |
595 | } | |
596 | ||
597 | /* | |
598 | * Given a root znode, retrieve the associated .zfs directory. | |
599 | * Add a hold to the vnode and return it. | |
600 | */ | |
601 | struct inode * | |
602 | zfsctl_root(znode_t *zp) | |
603 | { | |
604 | ASSERT(zfs_has_ctldir(zp)); | |
605 | igrab(ZTOZSB(zp)->z_ctldir); | |
606 | return (ZTOZSB(zp)->z_ctldir); | |
607 | } | |
608 | /* | |
609 | * Generate a long fid which includes the root object and objset of a | |
610 | * snapshot but not the generation number. For the root object the | |
611 | * generation number is ignored when zero to avoid needing to open | |
612 | * the dataset when generating fids for the snapshot names. | |
613 | */ | |
614 | static int | |
615 | zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp) | |
616 | { | |
617 | zfs_sb_t *zsb = ITOZSB(ip); | |
618 | zfid_short_t *zfid = (zfid_short_t *)fidp; | |
619 | zfid_long_t *zlfid = (zfid_long_t *)fidp; | |
620 | uint32_t gen = 0; | |
621 | uint64_t object; | |
622 | uint64_t objsetid; | |
623 | int i; | |
624 | ||
625 | object = zsb->z_root; | |
626 | objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino; | |
627 | zfid->zf_len = LONG_FID_LEN; | |
628 | ||
629 | for (i = 0; i < sizeof (zfid->zf_object); i++) | |
630 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
631 | ||
632 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
633 | zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); | |
634 | ||
635 | for (i = 0; i < sizeof (zlfid->zf_setid); i++) | |
636 | zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); | |
637 | ||
638 | for (i = 0; i < sizeof (zlfid->zf_setgen); i++) | |
639 | zlfid->zf_setgen[i] = 0; | |
640 | ||
641 | return (0); | |
642 | } | |
643 | ||
644 | /* | |
645 | * Generate an appropriate fid for an entry in the .zfs directory. | |
646 | */ | |
647 | int | |
648 | zfsctl_fid(struct inode *ip, fid_t *fidp) | |
649 | { | |
650 | znode_t *zp = ITOZ(ip); | |
651 | zfs_sb_t *zsb = ITOZSB(ip); | |
652 | uint64_t object = zp->z_id; | |
653 | zfid_short_t *zfid; | |
654 | int i; | |
655 | ||
656 | ZFS_ENTER(zsb); | |
657 | ||
658 | if (fidp->fid_len < SHORT_FID_LEN) { | |
659 | fidp->fid_len = SHORT_FID_LEN; | |
660 | ZFS_EXIT(zsb); | |
661 | return (SET_ERROR(ENOSPC)); | |
662 | } | |
663 | ||
664 | if (zfsctl_is_snapdir(ip)) { | |
665 | ZFS_EXIT(zsb); | |
666 | return (zfsctl_snapdir_fid(ip, fidp)); | |
667 | } | |
668 | ||
669 | zfid = (zfid_short_t *)fidp; | |
670 | ||
671 | zfid->zf_len = SHORT_FID_LEN; | |
672 | ||
673 | for (i = 0; i < sizeof (zfid->zf_object); i++) | |
674 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
675 | ||
676 | /* .zfs znodes always have a generation number of 0 */ | |
677 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
678 | zfid->zf_gen[i] = 0; | |
679 | ||
680 | ZFS_EXIT(zsb); | |
681 | return (0); | |
682 | } | |
683 | ||
684 | /* | |
685 | * Construct a full dataset name in full_name: "pool/dataset@snap_name" | |
686 | */ | |
687 | static int | |
688 | zfsctl_snapshot_name(zfs_sb_t *zsb, const char *snap_name, int len, | |
689 | char *full_name) | |
690 | { | |
691 | objset_t *os = zsb->z_os; | |
692 | ||
693 | if (zfs_component_namecheck(snap_name, NULL, NULL) != 0) | |
694 | return (SET_ERROR(EILSEQ)); | |
695 | ||
696 | dmu_objset_name(os, full_name); | |
697 | if ((strlen(full_name) + 1 + strlen(snap_name)) >= len) | |
698 | return (SET_ERROR(ENAMETOOLONG)); | |
699 | ||
700 | (void) strcat(full_name, "@"); | |
701 | (void) strcat(full_name, snap_name); | |
702 | ||
703 | return (0); | |
704 | } | |
705 | ||
706 | /* | |
707 | * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" | |
708 | */ | |
709 | static int | |
710 | zfsctl_snapshot_path(struct path *path, int len, char *full_path) | |
711 | { | |
712 | char *path_buffer, *path_ptr; | |
713 | int path_len, error = 0; | |
714 | ||
715 | path_buffer = kmem_alloc(len, KM_SLEEP); | |
716 | ||
717 | path_ptr = d_path(path, path_buffer, len); | |
718 | if (IS_ERR(path_ptr)) { | |
719 | error = -PTR_ERR(path_ptr); | |
720 | goto out; | |
721 | } | |
722 | ||
723 | path_len = path_buffer + len - 1 - path_ptr; | |
724 | if (path_len > len) { | |
725 | error = SET_ERROR(EFAULT); | |
726 | goto out; | |
727 | } | |
728 | ||
729 | memcpy(full_path, path_ptr, path_len); | |
730 | full_path[path_len] = '\0'; | |
731 | out: | |
732 | kmem_free(path_buffer, len); | |
733 | ||
734 | return (error); | |
735 | } | |
736 | ||
737 | /* | |
738 | * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" | |
739 | */ | |
740 | static int | |
741 | zfsctl_snapshot_path_objset(zfs_sb_t *zsb, uint64_t objsetid, | |
742 | int path_len, char *full_path) | |
743 | { | |
744 | objset_t *os = zsb->z_os; | |
745 | fstrans_cookie_t cookie; | |
746 | char *snapname; | |
747 | boolean_t case_conflict; | |
748 | uint64_t id, pos = 0; | |
749 | int error = 0; | |
750 | ||
751 | if (zsb->z_mntopts->z_mntpoint == NULL) | |
752 | return (ENOENT); | |
753 | ||
754 | cookie = spl_fstrans_mark(); | |
755 | snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
756 | ||
757 | while (error == 0) { | |
758 | dsl_pool_config_enter(dmu_objset_pool(os), FTAG); | |
759 | error = dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN, | |
760 | snapname, &id, &pos, &case_conflict); | |
761 | dsl_pool_config_exit(dmu_objset_pool(os), FTAG); | |
762 | if (error) | |
763 | goto out; | |
764 | ||
765 | if (id == objsetid) | |
766 | break; | |
767 | } | |
768 | ||
769 | memset(full_path, 0, path_len); | |
770 | snprintf(full_path, path_len - 1, "%s/.zfs/snapshot/%s", | |
771 | zsb->z_mntopts->z_mntpoint, snapname); | |
772 | out: | |
773 | kmem_free(snapname, MAXNAMELEN); | |
774 | spl_fstrans_unmark(cookie); | |
775 | ||
776 | return (error); | |
777 | } | |
778 | ||
779 | /* | |
780 | * Special case the handling of "..". | |
781 | */ | |
782 | int | |
783 | zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp, | |
784 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
785 | { | |
786 | zfs_sb_t *zsb = ITOZSB(dip); | |
787 | int error = 0; | |
788 | ||
789 | ZFS_ENTER(zsb); | |
790 | ||
791 | if (strcmp(name, "..") == 0) { | |
792 | *ipp = dip->i_sb->s_root->d_inode; | |
793 | } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { | |
794 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR, | |
795 | &zpl_fops_snapdir, &zpl_ops_snapdir); | |
796 | } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { | |
797 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES, | |
798 | &zpl_fops_shares, &zpl_ops_shares); | |
799 | } else { | |
800 | *ipp = NULL; | |
801 | } | |
802 | ||
803 | if (*ipp == NULL) | |
804 | error = SET_ERROR(ENOENT); | |
805 | ||
806 | ZFS_EXIT(zsb); | |
807 | ||
808 | return (error); | |
809 | } | |
810 | ||
811 | /* | |
812 | * Lookup entry point for the 'snapshot' directory. Try to open the | |
813 | * snapshot if it exist, creating the pseudo filesystem inode as necessary. | |
814 | * Perform a mount of the associated dataset on top of the inode. | |
815 | */ | |
816 | int | |
817 | zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp, | |
818 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
819 | { | |
820 | zfs_sb_t *zsb = ITOZSB(dip); | |
821 | uint64_t id; | |
822 | int error; | |
823 | ||
824 | ZFS_ENTER(zsb); | |
825 | ||
826 | error = dmu_snapshot_lookup(zsb->z_os, name, &id); | |
827 | if (error) { | |
828 | ZFS_EXIT(zsb); | |
829 | return (error); | |
830 | } | |
831 | ||
832 | *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id, | |
833 | &simple_dir_operations, &simple_dir_inode_operations); | |
834 | if (*ipp == NULL) | |
835 | error = SET_ERROR(ENOENT); | |
836 | ||
837 | ZFS_EXIT(zsb); | |
838 | ||
839 | return (error); | |
840 | } | |
841 | ||
842 | /* | |
843 | * Renaming a directory under '.zfs/snapshot' will automatically trigger | |
844 | * a rename of the snapshot to the new given name. The rename is confined | |
845 | * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. | |
846 | */ | |
847 | int | |
848 | zfsctl_snapdir_rename(struct inode *sdip, char *snm, | |
849 | struct inode *tdip, char *tnm, cred_t *cr, int flags) | |
850 | { | |
851 | zfs_sb_t *zsb = ITOZSB(sdip); | |
852 | char *to, *from, *real, *fsname; | |
853 | int error; | |
854 | ||
855 | if (!zfs_admin_snapshot) | |
856 | return (EACCES); | |
857 | ||
858 | ZFS_ENTER(zsb); | |
859 | ||
860 | to = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
861 | from = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
862 | real = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
863 | fsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
864 | ||
865 | if (zsb->z_case == ZFS_CASE_INSENSITIVE) { | |
866 | error = dmu_snapshot_realname(zsb->z_os, snm, real, | |
867 | MAXNAMELEN, NULL); | |
868 | if (error == 0) { | |
869 | snm = real; | |
870 | } else if (error != ENOTSUP) { | |
871 | goto out; | |
872 | } | |
873 | } | |
874 | ||
875 | dmu_objset_name(zsb->z_os, fsname); | |
876 | ||
877 | error = zfsctl_snapshot_name(ITOZSB(sdip), snm, MAXNAMELEN, from); | |
878 | if (error == 0) | |
879 | error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, MAXNAMELEN, to); | |
880 | if (error == 0) | |
881 | error = zfs_secpolicy_rename_perms(from, to, cr); | |
882 | if (error != 0) | |
883 | goto out; | |
884 | ||
885 | /* | |
886 | * Cannot move snapshots out of the snapdir. | |
887 | */ | |
888 | if (sdip != tdip) { | |
889 | error = SET_ERROR(EINVAL); | |
890 | goto out; | |
891 | } | |
892 | ||
893 | /* | |
894 | * No-op when names are identical. | |
895 | */ | |
896 | if (strcmp(snm, tnm) == 0) { | |
897 | error = 0; | |
898 | goto out; | |
899 | } | |
900 | ||
901 | rw_enter(&zfs_snapshot_lock, RW_WRITER); | |
902 | ||
903 | error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE); | |
904 | if (error == 0) | |
905 | (void) zfsctl_snapshot_rename(snm, tnm); | |
906 | ||
907 | rw_exit(&zfs_snapshot_lock); | |
908 | out: | |
909 | kmem_free(from, MAXNAMELEN); | |
910 | kmem_free(to, MAXNAMELEN); | |
911 | kmem_free(real, MAXNAMELEN); | |
912 | kmem_free(fsname, MAXNAMELEN); | |
913 | ||
914 | ZFS_EXIT(zsb); | |
915 | ||
916 | return (error); | |
917 | } | |
918 | ||
919 | /* | |
920 | * Removing a directory under '.zfs/snapshot' will automatically trigger | |
921 | * the removal of the snapshot with the given name. | |
922 | */ | |
923 | int | |
924 | zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags) | |
925 | { | |
926 | zfs_sb_t *zsb = ITOZSB(dip); | |
927 | char *snapname, *real; | |
928 | int error; | |
929 | ||
930 | if (!zfs_admin_snapshot) | |
931 | return (EACCES); | |
932 | ||
933 | ZFS_ENTER(zsb); | |
934 | ||
935 | snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
936 | real = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
937 | ||
938 | if (zsb->z_case == ZFS_CASE_INSENSITIVE) { | |
939 | error = dmu_snapshot_realname(zsb->z_os, name, real, | |
940 | MAXNAMELEN, NULL); | |
941 | if (error == 0) { | |
942 | name = real; | |
943 | } else if (error != ENOTSUP) { | |
944 | goto out; | |
945 | } | |
946 | } | |
947 | ||
948 | error = zfsctl_snapshot_name(ITOZSB(dip), name, MAXNAMELEN, snapname); | |
949 | if (error == 0) | |
950 | error = zfs_secpolicy_destroy_perms(snapname, cr); | |
951 | if (error != 0) | |
952 | goto out; | |
953 | ||
954 | error = zfsctl_snapshot_unmount(snapname, MNT_FORCE); | |
955 | if ((error == 0) || (error == ENOENT)) | |
956 | error = dsl_destroy_snapshot(snapname, B_FALSE); | |
957 | out: | |
958 | kmem_free(snapname, MAXNAMELEN); | |
959 | kmem_free(real, MAXNAMELEN); | |
960 | ||
961 | ZFS_EXIT(zsb); | |
962 | ||
963 | return (error); | |
964 | } | |
965 | ||
966 | /* | |
967 | * Creating a directory under '.zfs/snapshot' will automatically trigger | |
968 | * the creation of a new snapshot with the given name. | |
969 | */ | |
970 | int | |
971 | zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap, | |
972 | struct inode **ipp, cred_t *cr, int flags) | |
973 | { | |
974 | zfs_sb_t *zsb = ITOZSB(dip); | |
975 | char *dsname; | |
976 | int error; | |
977 | ||
978 | if (!zfs_admin_snapshot) | |
979 | return (EACCES); | |
980 | ||
981 | dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP); | |
982 | ||
983 | if (zfs_component_namecheck(dirname, NULL, NULL) != 0) { | |
984 | error = SET_ERROR(EILSEQ); | |
985 | goto out; | |
986 | } | |
987 | ||
988 | dmu_objset_name(zsb->z_os, dsname); | |
989 | ||
990 | error = zfs_secpolicy_snapshot_perms(dsname, cr); | |
991 | if (error != 0) | |
992 | goto out; | |
993 | ||
994 | if (error == 0) { | |
995 | error = dmu_objset_snapshot_one(dsname, dirname); | |
996 | if (error != 0) | |
997 | goto out; | |
998 | ||
999 | error = zfsctl_snapdir_lookup(dip, dirname, ipp, | |
1000 | 0, cr, NULL, NULL); | |
1001 | } | |
1002 | out: | |
1003 | kmem_free(dsname, MAXNAMELEN); | |
1004 | ||
1005 | return (error); | |
1006 | } | |
1007 | ||
1008 | /* | |
1009 | * Attempt to unmount a snapshot by making a call to user space. | |
1010 | * There is no assurance that this can or will succeed, is just a | |
1011 | * best effort. In the case where it does fail, perhaps because | |
1012 | * it's in use, the unmount will fail harmlessly. | |
1013 | */ | |
1014 | int | |
1015 | zfsctl_snapshot_unmount(char *snapname, int flags) | |
1016 | { | |
1017 | char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL, | |
1018 | NULL }; | |
1019 | char *envp[] = { NULL }; | |
1020 | zfs_snapentry_t *se; | |
1021 | int error; | |
1022 | ||
1023 | rw_enter(&zfs_snapshot_lock, RW_READER); | |
1024 | if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) { | |
1025 | rw_exit(&zfs_snapshot_lock); | |
1026 | return (ENOENT); | |
1027 | } | |
1028 | rw_exit(&zfs_snapshot_lock); | |
1029 | ||
1030 | if (flags & MNT_FORCE) | |
1031 | argv[4] = "-fn"; | |
1032 | argv[5] = se->se_path; | |
1033 | dprintf("unmount; path=%s\n", se->se_path); | |
1034 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | |
1035 | zfsctl_snapshot_rele(se); | |
1036 | ||
1037 | ||
1038 | /* | |
1039 | * The umount system utility will return 256 on error. We must | |
1040 | * assume this error is because the file system is busy so it is | |
1041 | * converted to the more sensible EBUSY. | |
1042 | */ | |
1043 | if (error) | |
1044 | error = SET_ERROR(EBUSY); | |
1045 | ||
1046 | return (error); | |
1047 | } | |
1048 | ||
1049 | #define MOUNT_BUSY 0x80 /* Mount failed due to EBUSY (from mntent.h) */ | |
1050 | ||
1051 | int | |
1052 | zfsctl_snapshot_mount(struct path *path, int flags) | |
1053 | { | |
1054 | struct dentry *dentry = path->dentry; | |
1055 | struct inode *ip = dentry->d_inode; | |
1056 | zfs_sb_t *zsb; | |
1057 | zfs_sb_t *snap_zsb; | |
1058 | zfs_snapentry_t *se; | |
1059 | char *full_name, *full_path; | |
1060 | char *argv[] = { "/usr/bin/env", "mount", "-t", "zfs", "-n", NULL, NULL, | |
1061 | NULL }; | |
1062 | char *envp[] = { NULL }; | |
1063 | int error; | |
1064 | struct path spath; | |
1065 | ||
1066 | if (ip == NULL) | |
1067 | return (EISDIR); | |
1068 | ||
1069 | zsb = ITOZSB(ip); | |
1070 | ZFS_ENTER(zsb); | |
1071 | ||
1072 | full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP); | |
1073 | full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); | |
1074 | ||
1075 | error = zfsctl_snapshot_name(zsb, dname(dentry), | |
1076 | MAXNAMELEN, full_name); | |
1077 | if (error) | |
1078 | goto error; | |
1079 | ||
1080 | error = zfsctl_snapshot_path(path, MAXPATHLEN, full_path); | |
1081 | if (error) | |
1082 | goto error; | |
1083 | ||
1084 | /* | |
1085 | * Multiple concurrent automounts of a snapshot are never allowed. | |
1086 | * The snapshot may be manually mounted as many times as desired. | |
1087 | */ | |
1088 | if (zfsctl_snapshot_ismounted(full_name)) { | |
1089 | error = 0; | |
1090 | goto error; | |
1091 | } | |
1092 | ||
1093 | /* | |
1094 | * Attempt to mount the snapshot from user space. Normally this | |
1095 | * would be done using the vfs_kern_mount() function, however that | |
1096 | * function is marked GPL-only and cannot be used. On error we | |
1097 | * careful to log the real error to the console and return EISDIR | |
1098 | * to safely abort the automount. This should be very rare. | |
1099 | * | |
1100 | * If the user mode helper happens to return EBUSY, a concurrent | |
1101 | * mount is already in progress in which case the error is ignored. | |
1102 | * Take note that if the program was executed successfully the return | |
1103 | * value from call_usermodehelper() will be (exitcode << 8 + signal). | |
1104 | */ | |
1105 | dprintf("mount; name=%s path=%s\n", full_name, full_path); | |
1106 | argv[5] = full_name; | |
1107 | argv[6] = full_path; | |
1108 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | |
1109 | if (error) { | |
1110 | if (!(error & MOUNT_BUSY << 8)) { | |
1111 | cmn_err(CE_WARN, "Unable to automount %s/%s: %d", | |
1112 | full_path, full_name, error); | |
1113 | error = SET_ERROR(EISDIR); | |
1114 | } else { | |
1115 | /* | |
1116 | * EBUSY, this could mean a concurrent mount, or the | |
1117 | * snapshot has already been mounted at completely | |
1118 | * different place. We return 0 so VFS will retry. For | |
1119 | * the latter case the VFS will retry several times | |
1120 | * and return ELOOP, which is probably not a very good | |
1121 | * behavior. | |
1122 | */ | |
1123 | error = 0; | |
1124 | } | |
1125 | goto error; | |
1126 | } | |
1127 | ||
1128 | /* | |
1129 | * Follow down in to the mounted snapshot and set MNT_SHRINKABLE | |
1130 | * to identify this as an automounted filesystem. | |
1131 | */ | |
1132 | spath = *path; | |
1133 | path_get(&spath); | |
1134 | if (zpl_follow_down_one(&spath)) { | |
1135 | snap_zsb = ITOZSB(spath.dentry->d_inode); | |
1136 | snap_zsb->z_parent = zsb; | |
1137 | dentry = spath.dentry; | |
1138 | spath.mnt->mnt_flags |= MNT_SHRINKABLE; | |
1139 | ||
1140 | rw_enter(&zfs_snapshot_lock, RW_WRITER); | |
1141 | se = zfsctl_snapshot_alloc(full_name, full_path, | |
1142 | snap_zsb->z_os->os_spa, dmu_objset_id(snap_zsb->z_os), | |
1143 | dentry); | |
1144 | zfsctl_snapshot_add(se); | |
1145 | zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); | |
1146 | rw_exit(&zfs_snapshot_lock); | |
1147 | } | |
1148 | path_put(&spath); | |
1149 | error: | |
1150 | kmem_free(full_name, MAXNAMELEN); | |
1151 | kmem_free(full_path, MAXPATHLEN); | |
1152 | ||
1153 | ZFS_EXIT(zsb); | |
1154 | ||
1155 | return (error); | |
1156 | } | |
1157 | ||
1158 | /* | |
1159 | * Given the objset id of the snapshot return its zfs_sb_t as zsbp. | |
1160 | */ | |
1161 | int | |
1162 | zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp) | |
1163 | { | |
1164 | zfs_snapentry_t *se; | |
1165 | int error; | |
1166 | spa_t *spa = ((zfs_sb_t *)(sb->s_fs_info))->z_os->os_spa; | |
1167 | ||
1168 | /* | |
1169 | * Verify that the snapshot is mounted then lookup the mounted root | |
1170 | * rather than the covered mount point. This may fail if the | |
1171 | * snapshot has just been unmounted by an unrelated user space | |
1172 | * process. This race cannot occur to an expired mount point | |
1173 | * because we hold the zfs_snapshot_lock to prevent the race. | |
1174 | */ | |
1175 | rw_enter(&zfs_snapshot_lock, RW_READER); | |
1176 | if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { | |
1177 | zfs_sb_t *zsb; | |
1178 | ||
1179 | zsb = ITOZSB(se->se_root_dentry->d_inode); | |
1180 | ASSERT3U(dmu_objset_id(zsb->z_os), ==, objsetid); | |
1181 | ||
1182 | if (time_after(jiffies, zsb->z_snap_defer_time + | |
1183 | MAX(zfs_expire_snapshot * HZ / 2, HZ))) { | |
1184 | zsb->z_snap_defer_time = jiffies; | |
1185 | zfsctl_snapshot_unmount_cancel(se); | |
1186 | zfsctl_snapshot_unmount_delay_impl(se, | |
1187 | zfs_expire_snapshot); | |
1188 | } | |
1189 | ||
1190 | *zsbp = zsb; | |
1191 | zfsctl_snapshot_rele(se); | |
1192 | error = SET_ERROR(0); | |
1193 | } else { | |
1194 | error = SET_ERROR(ENOENT); | |
1195 | } | |
1196 | rw_exit(&zfs_snapshot_lock); | |
1197 | ||
1198 | /* | |
1199 | * Automount the snapshot given the objset id by constructing the | |
1200 | * full mount point and performing a traversal. | |
1201 | */ | |
1202 | if (error == ENOENT) { | |
1203 | struct path path; | |
1204 | char *mnt; | |
1205 | ||
1206 | mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP); | |
1207 | error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid, | |
1208 | MAXPATHLEN, mnt); | |
1209 | if (error) { | |
1210 | kmem_free(mnt, MAXPATHLEN); | |
1211 | return (SET_ERROR(error)); | |
1212 | } | |
1213 | ||
1214 | error = kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path); | |
1215 | if (error == 0) { | |
1216 | *zsbp = ITOZSB(path.dentry->d_inode); | |
1217 | path_put(&path); | |
1218 | } | |
1219 | ||
1220 | kmem_free(mnt, MAXPATHLEN); | |
1221 | } | |
1222 | ||
1223 | return (error); | |
1224 | } | |
1225 | ||
1226 | int | |
1227 | zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, | |
1228 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
1229 | { | |
1230 | zfs_sb_t *zsb = ITOZSB(dip); | |
1231 | struct inode *ip; | |
1232 | znode_t *dzp; | |
1233 | int error; | |
1234 | ||
1235 | ZFS_ENTER(zsb); | |
1236 | ||
1237 | if (zsb->z_shares_dir == 0) { | |
1238 | ZFS_EXIT(zsb); | |
1239 | return (SET_ERROR(ENOTSUP)); | |
1240 | } | |
1241 | ||
1242 | error = zfs_zget(zsb, zsb->z_shares_dir, &dzp); | |
1243 | if (error) { | |
1244 | ZFS_EXIT(zsb); | |
1245 | return (error); | |
1246 | } | |
1247 | ||
1248 | error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL); | |
1249 | ||
1250 | iput(ZTOI(dzp)); | |
1251 | ZFS_EXIT(zsb); | |
1252 | ||
1253 | return (error); | |
1254 | } | |
1255 | ||
1256 | ||
1257 | /* | |
1258 | * Initialize the various pieces we'll need to create and manipulate .zfs | |
1259 | * directories. Currently this is unused but available. | |
1260 | */ | |
1261 | void | |
1262 | zfsctl_init(void) | |
1263 | { | |
1264 | avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name, | |
1265 | sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, | |
1266 | se_node_name)); | |
1267 | avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid, | |
1268 | sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, | |
1269 | se_node_objsetid)); | |
1270 | rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL); | |
1271 | ||
1272 | zfs_expire_taskq = taskq_create("z_unmount", 1, defclsyspri, | |
1273 | 1, 8, TASKQ_PREPOPULATE); | |
1274 | } | |
1275 | ||
1276 | /* | |
1277 | * Cleanup the various pieces we needed for .zfs directories. In particular | |
1278 | * ensure the expiry timer is canceled safely. | |
1279 | */ | |
1280 | void | |
1281 | zfsctl_fini(void) | |
1282 | { | |
1283 | taskq_destroy(zfs_expire_taskq); | |
1284 | ||
1285 | avl_destroy(&zfs_snapshots_by_name); | |
1286 | avl_destroy(&zfs_snapshots_by_objsetid); | |
1287 | rw_destroy(&zfs_snapshot_lock); | |
1288 | } | |
1289 | ||
1290 | module_param(zfs_admin_snapshot, int, 0644); | |
1291 | MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot"); | |
1292 | ||
1293 | module_param(zfs_expire_snapshot, int, 0644); | |
1294 | MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); |