]>
Commit | Line | Data |
---|---|---|
ebe7e575 BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
22 | * | |
23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
24 | * Copyright (C) 2011 Lawrence Livermore National Security, LLC. | |
25 | * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). | |
26 | * LLNL-CODE-403049. | |
27 | * Rewritten for Linux by: | |
28 | * Rohan Puri <rohan.puri15@gmail.com> | |
29 | * Brian Behlendorf <behlendorf1@llnl.gov> | |
2e528b49 | 30 | * Copyright (c) 2013 by Delphix. All rights reserved. |
8adb798a | 31 | * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. |
877d925a | 32 | * Copyright (c) 2018 George Melikov. All Rights Reserved. |
6dbca94f | 33 | * Copyright (c) 2019 Datto, Inc. All rights reserved. |
b9007997 | 34 | * Copyright (c) 2020 The MathWorks, Inc. All rights reserved. |
ebe7e575 BB |
35 | */ |
36 | ||
37 | /* | |
38 | * ZFS control directory (a.k.a. ".zfs") | |
39 | * | |
40 | * This directory provides a common location for all ZFS meta-objects. | |
41 | * Currently, this is only the 'snapshot' and 'shares' directory, but this may | |
42 | * expand in the future. The elements are built dynamically, as the hierarchy | |
43 | * does not actually exist on disk. | |
44 | * | |
45 | * For 'snapshot', we don't want to have all snapshots always mounted, because | |
46 | * this would take up a huge amount of space in /etc/mnttab. We have three | |
47 | * types of objects: | |
48 | * | |
49 | * ctldir ------> snapshotdir -------> snapshot | |
50 | * | | |
51 | * | | |
52 | * V | |
53 | * mounted fs | |
54 | * | |
55 | * The 'snapshot' node contains just enough information to lookup '..' and act | |
56 | * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we | |
57 | * perform an automount of the underlying filesystem and return the | |
58 | * corresponding inode. | |
59 | * | |
60 | * All mounts are handled automatically by an user mode helper which invokes | |
a7628932 | 61 | * the mount procedure. Unmounts are handled by allowing the mount |
ebe7e575 BB |
62 | * point to expire so the kernel may automatically unmount it. |
63 | * | |
64 | * The '.zfs', '.zfs/snapshot', and all directories created under | |
65 | * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same | |
a7628932 | 66 | * zfsvfs_t as the head filesystem (what '.zfs' lives under). |
ebe7e575 BB |
67 | * |
68 | * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths | |
69 | * (ie: snapshots) are complete ZFS filesystems and have their own unique | |
0037b49e BB |
70 | * zfsvfs_t. However, the fsid reported by these mounts will be the same |
71 | * as that used by the parent zfsvfs_t to make NFS happy. | |
ebe7e575 BB |
72 | */ |
73 | ||
74 | #include <sys/types.h> | |
75 | #include <sys/param.h> | |
76 | #include <sys/time.h> | |
ebe7e575 BB |
77 | #include <sys/sysmacros.h> |
78 | #include <sys/pathname.h> | |
79 | #include <sys/vfs.h> | |
ebe7e575 BB |
80 | #include <sys/zfs_ctldir.h> |
81 | #include <sys/zfs_ioctl.h> | |
82 | #include <sys/zfs_vfsops.h> | |
83 | #include <sys/zfs_vnops.h> | |
84 | #include <sys/stat.h> | |
85 | #include <sys/dmu.h> | |
24ef51f6 | 86 | #include <sys/dmu_objset.h> |
13fe0198 | 87 | #include <sys/dsl_destroy.h> |
ebe7e575 | 88 | #include <sys/dsl_deleg.h> |
ebe7e575 | 89 | #include <sys/zpl.h> |
c3e5907f | 90 | #include <sys/mntent.h> |
ebe7e575 BB |
91 | #include "zfs_namecheck.h" |
92 | ||
278bee93 BB |
93 | /* |
94 | * Two AVL trees are maintained which contain all currently automounted | |
95 | * snapshots. Every automounted snapshots maps to a single zfs_snapentry_t | |
96 | * entry which MUST: | |
97 | * | |
98 | * - be attached to both trees, and | |
99 | * - be unique, no duplicate entries are allowed. | |
100 | * | |
101 | * The zfs_snapshots_by_name tree is indexed by the full dataset name | |
102 | * while the zfs_snapshots_by_objsetid tree is indexed by the unique | |
103 | * objsetid. This allows for fast lookups either by name or objsetid. | |
104 | */ | |
105 | static avl_tree_t zfs_snapshots_by_name; | |
106 | static avl_tree_t zfs_snapshots_by_objsetid; | |
5ed27c57 | 107 | static krwlock_t zfs_snapshot_lock; |
278bee93 | 108 | |
ebe7e575 BB |
109 | /* |
110 | * Control Directory Tunables (.zfs) | |
111 | */ | |
112 | int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; | |
18168da7 | 113 | static int zfs_admin_snapshot = 0; |
ebe7e575 | 114 | |
278bee93 BB |
115 | typedef struct { |
116 | char *se_name; /* full snapshot name */ | |
117 | char *se_path; /* full mount path */ | |
24ef51f6 | 118 | spa_t *se_spa; /* pool spa */ |
278bee93 BB |
119 | uint64_t se_objsetid; /* snapshot objset id */ |
120 | struct dentry *se_root_dentry; /* snapshot root dentry */ | |
e79b6807 | 121 | krwlock_t se_taskqid_lock; /* scheduled unmount taskqid lock */ |
278bee93 BB |
122 | taskqid_t se_taskqid; /* scheduled unmount taskqid */ |
123 | avl_node_t se_node_name; /* zfs_snapshots_by_name link */ | |
124 | avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */ | |
c13060e4 | 125 | zfs_refcount_t se_refcount; /* reference count */ |
278bee93 BB |
126 | } zfs_snapentry_t; |
127 | ||
128 | static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay); | |
129 | ||
130 | /* | |
131 | * Allocate a new zfs_snapentry_t being careful to make a copy of the | |
132 | * the snapshot name and provided mount point. No reference is taken. | |
133 | */ | |
ebe7e575 | 134 | static zfs_snapentry_t * |
4d55ea81 | 135 | zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa, |
24ef51f6 | 136 | uint64_t objsetid, struct dentry *root_dentry) |
ebe7e575 | 137 | { |
278bee93 BB |
138 | zfs_snapentry_t *se; |
139 | ||
140 | se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP); | |
141 | ||
e4f5fa12 MM |
142 | se->se_name = kmem_strdup(full_name); |
143 | se->se_path = kmem_strdup(full_path); | |
24ef51f6 | 144 | se->se_spa = spa; |
278bee93 BB |
145 | se->se_objsetid = objsetid; |
146 | se->se_root_dentry = root_dentry; | |
48d3eb40 | 147 | se->se_taskqid = TASKQID_INVALID; |
e79b6807 | 148 | rw_init(&se->se_taskqid_lock, NULL, RW_DEFAULT, NULL); |
278bee93 | 149 | |
424fd7c3 | 150 | zfs_refcount_create(&se->se_refcount); |
278bee93 BB |
151 | |
152 | return (se); | |
ebe7e575 BB |
153 | } |
154 | ||
278bee93 | 155 | /* |
a7628932 | 156 | * Free a zfs_snapentry_t the caller must ensure there are no active |
278bee93 BB |
157 | * references. |
158 | */ | |
159 | static void | |
160 | zfsctl_snapshot_free(zfs_snapentry_t *se) | |
ebe7e575 | 161 | { |
424fd7c3 | 162 | zfs_refcount_destroy(&se->se_refcount); |
e4f5fa12 MM |
163 | kmem_strfree(se->se_name); |
164 | kmem_strfree(se->se_path); | |
e79b6807 | 165 | rw_destroy(se->se_taskqid_lock); |
278bee93 BB |
166 | |
167 | kmem_free(se, sizeof (zfs_snapentry_t)); | |
ebe7e575 BB |
168 | } |
169 | ||
170 | /* | |
278bee93 | 171 | * Hold a reference on the zfs_snapentry_t. |
ebe7e575 BB |
172 | */ |
173 | static void | |
278bee93 | 174 | zfsctl_snapshot_hold(zfs_snapentry_t *se) |
ebe7e575 | 175 | { |
c13060e4 | 176 | zfs_refcount_add(&se->se_refcount, NULL); |
278bee93 BB |
177 | } |
178 | ||
179 | /* | |
180 | * Release a reference on the zfs_snapentry_t. When the number of | |
181 | * references drops to zero the structure will be freed. | |
182 | */ | |
183 | static void | |
184 | zfsctl_snapshot_rele(zfs_snapentry_t *se) | |
185 | { | |
424fd7c3 | 186 | if (zfs_refcount_remove(&se->se_refcount, NULL) == 0) |
278bee93 BB |
187 | zfsctl_snapshot_free(se); |
188 | } | |
ebe7e575 | 189 | |
278bee93 BB |
190 | /* |
191 | * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and | |
192 | * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part | |
193 | * of the trees a reference is held. | |
194 | */ | |
195 | static void | |
196 | zfsctl_snapshot_add(zfs_snapentry_t *se) | |
197 | { | |
5ed27c57 | 198 | ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); |
c8802ba0 | 199 | zfsctl_snapshot_hold(se); |
278bee93 BB |
200 | avl_add(&zfs_snapshots_by_name, se); |
201 | avl_add(&zfs_snapshots_by_objsetid, se); | |
ebe7e575 BB |
202 | } |
203 | ||
278bee93 BB |
204 | /* |
205 | * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and | |
206 | * zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped, | |
207 | * this can result in the structure being freed if that was the last | |
208 | * remaining reference. | |
209 | */ | |
210 | static void | |
211 | zfsctl_snapshot_remove(zfs_snapentry_t *se) | |
212 | { | |
5ed27c57 | 213 | ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); |
278bee93 BB |
214 | avl_remove(&zfs_snapshots_by_name, se); |
215 | avl_remove(&zfs_snapshots_by_objsetid, se); | |
216 | zfsctl_snapshot_rele(se); | |
217 | } | |
218 | ||
219 | /* | |
220 | * Snapshot name comparison function for the zfs_snapshots_by_name. | |
221 | */ | |
222 | static int | |
223 | snapentry_compare_by_name(const void *a, const void *b) | |
ebe7e575 | 224 | { |
278bee93 BB |
225 | const zfs_snapentry_t *se_a = a; |
226 | const zfs_snapentry_t *se_b = b; | |
227 | int ret; | |
228 | ||
229 | ret = strcmp(se_a->se_name, se_b->se_name); | |
ebe7e575 BB |
230 | |
231 | if (ret < 0) | |
232 | return (-1); | |
233 | else if (ret > 0) | |
234 | return (1); | |
235 | else | |
236 | return (0); | |
237 | } | |
238 | ||
278bee93 BB |
239 | /* |
240 | * Snapshot name comparison function for the zfs_snapshots_by_objsetid. | |
241 | */ | |
242 | static int | |
243 | snapentry_compare_by_objsetid(const void *a, const void *b) | |
244 | { | |
245 | const zfs_snapentry_t *se_a = a; | |
246 | const zfs_snapentry_t *se_b = b; | |
247 | ||
24ef51f6 CC |
248 | if (se_a->se_spa != se_b->se_spa) |
249 | return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1); | |
250 | ||
278bee93 BB |
251 | if (se_a->se_objsetid < se_b->se_objsetid) |
252 | return (-1); | |
253 | else if (se_a->se_objsetid > se_b->se_objsetid) | |
254 | return (1); | |
255 | else | |
256 | return (0); | |
257 | } | |
258 | ||
259 | /* | |
260 | * Find a zfs_snapentry_t in zfs_snapshots_by_name. If the snapname | |
261 | * is found a pointer to the zfs_snapentry_t is returned and a reference | |
262 | * taken on the structure. The caller is responsible for dropping the | |
263 | * reference with zfsctl_snapshot_rele(). If the snapname is not found | |
264 | * NULL will be returned. | |
265 | */ | |
266 | static zfs_snapentry_t * | |
4d55ea81 | 267 | zfsctl_snapshot_find_by_name(const char *snapname) |
278bee93 BB |
268 | { |
269 | zfs_snapentry_t *se, search; | |
270 | ||
5ed27c57 | 271 | ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); |
278bee93 | 272 | |
4d55ea81 | 273 | search.se_name = (char *)snapname; |
278bee93 BB |
274 | se = avl_find(&zfs_snapshots_by_name, &search, NULL); |
275 | if (se) | |
c8802ba0 | 276 | zfsctl_snapshot_hold(se); |
278bee93 BB |
277 | |
278 | return (se); | |
279 | } | |
280 | ||
281 | /* | |
282 | * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id | |
283 | * rather than the snapname. In all other respects it behaves the same | |
284 | * as zfsctl_snapshot_find_by_name(). | |
285 | */ | |
286 | static zfs_snapentry_t * | |
24ef51f6 | 287 | zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid) |
278bee93 BB |
288 | { |
289 | zfs_snapentry_t *se, search; | |
290 | ||
5ed27c57 | 291 | ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); |
278bee93 | 292 | |
24ef51f6 | 293 | search.se_spa = spa; |
278bee93 BB |
294 | search.se_objsetid = objsetid; |
295 | se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL); | |
296 | if (se) | |
c8802ba0 | 297 | zfsctl_snapshot_hold(se); |
278bee93 BB |
298 | |
299 | return (se); | |
300 | } | |
301 | ||
302 | /* | |
303 | * Rename a zfs_snapentry_t in the zfs_snapshots_by_name. The structure is | |
304 | * removed, renamed, and added back to the new correct location in the tree. | |
305 | */ | |
306 | static int | |
4d55ea81 | 307 | zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname) |
278bee93 BB |
308 | { |
309 | zfs_snapentry_t *se; | |
310 | ||
5ed27c57 | 311 | ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); |
278bee93 BB |
312 | |
313 | se = zfsctl_snapshot_find_by_name(old_snapname); | |
314 | if (se == NULL) | |
ecb2b7dc | 315 | return (SET_ERROR(ENOENT)); |
278bee93 BB |
316 | |
317 | zfsctl_snapshot_remove(se); | |
e4f5fa12 MM |
318 | kmem_strfree(se->se_name); |
319 | se->se_name = kmem_strdup(new_snapname); | |
278bee93 BB |
320 | zfsctl_snapshot_add(se); |
321 | zfsctl_snapshot_rele(se); | |
322 | ||
323 | return (0); | |
324 | } | |
325 | ||
326 | /* | |
327 | * Delayed task responsible for unmounting an expired automounted snapshot. | |
328 | */ | |
329 | static void | |
330 | snapentry_expire(void *data) | |
331 | { | |
332 | zfs_snapentry_t *se = (zfs_snapentry_t *)data; | |
24ef51f6 | 333 | spa_t *spa = se->se_spa; |
278bee93 BB |
334 | uint64_t objsetid = se->se_objsetid; |
335 | ||
5e94284f BB |
336 | if (zfs_expire_snapshot <= 0) { |
337 | zfsctl_snapshot_rele(se); | |
338 | return; | |
339 | } | |
340 | ||
e79b6807 | 341 | rw_enter(&se->se_taskqid_lock, RW_WRITER); |
48d3eb40 | 342 | se->se_taskqid = TASKQID_INVALID; |
e79b6807 | 343 | rw_exit(&se->se_taskqid_lock); |
278bee93 BB |
344 | (void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE); |
345 | zfsctl_snapshot_rele(se); | |
346 | ||
347 | /* | |
348 | * Reschedule the unmount if the zfs_snapentry_t wasn't removed. | |
349 | * This can occur when the snapshot is busy. | |
350 | */ | |
5ed27c57 | 351 | rw_enter(&zfs_snapshot_lock, RW_READER); |
24ef51f6 | 352 | if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { |
278bee93 BB |
353 | zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); |
354 | zfsctl_snapshot_rele(se); | |
355 | } | |
5ed27c57 | 356 | rw_exit(&zfs_snapshot_lock); |
278bee93 BB |
357 | } |
358 | ||
359 | /* | |
360 | * Cancel an automatic unmount of a snapname. This callback is responsible | |
361 | * for dropping the reference on the zfs_snapentry_t which was taken when | |
362 | * during dispatch. | |
363 | */ | |
364 | static void | |
365 | zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se) | |
366 | { | |
e79b6807 RE |
367 | int err = 0; |
368 | rw_enter(&se->se_taskqid_lock, RW_WRITER); | |
369 | err = taskq_cancel_id(system_delay_taskq, se->se_taskqid); | |
370 | /* | |
371 | * if we get ENOENT, the taskq couldn't be found to be | |
372 | * canceled, so we can just mark it as invalid because | |
373 | * it's already gone. If we got EBUSY, then we already | |
374 | * blocked until it was gone _anyway_, so we don't care. | |
375 | */ | |
376 | se->se_taskqid = TASKQID_INVALID; | |
377 | rw_exit(&se->se_taskqid_lock); | |
378 | if (err == 0) { | |
278bee93 BB |
379 | zfsctl_snapshot_rele(se); |
380 | } | |
381 | } | |
382 | ||
383 | /* | |
384 | * Dispatch the unmount task for delayed handling with a hold protecting it. | |
385 | */ | |
386 | static void | |
387 | zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay) | |
388 | { | |
278bee93 | 389 | |
5e94284f BB |
390 | if (delay <= 0) |
391 | return; | |
392 | ||
245b7ab3 | 393 | zfsctl_snapshot_hold(se); |
e79b6807 RE |
394 | rw_enter(&se->se_taskqid_lock, RW_WRITER); |
395 | ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID); | |
57ddcda1 | 396 | se->se_taskqid = taskq_dispatch_delay(system_delay_taskq, |
278bee93 | 397 | snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ); |
e79b6807 | 398 | rw_exit(&se->se_taskqid_lock); |
278bee93 BB |
399 | } |
400 | ||
401 | /* | |
402 | * Schedule an automatic unmount of objset id to occur in delay seconds from | |
403 | * now. Any previous delayed unmount will be cancelled in favor of the | |
404 | * updated deadline. A reference is taken by zfsctl_snapshot_find_by_name() | |
405 | * and held until the outstanding task is handled or cancelled. | |
406 | */ | |
407 | int | |
24ef51f6 | 408 | zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay) |
278bee93 BB |
409 | { |
410 | zfs_snapentry_t *se; | |
411 | int error = ENOENT; | |
412 | ||
5ed27c57 | 413 | rw_enter(&zfs_snapshot_lock, RW_READER); |
24ef51f6 | 414 | if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { |
278bee93 BB |
415 | zfsctl_snapshot_unmount_cancel(se); |
416 | zfsctl_snapshot_unmount_delay_impl(se, delay); | |
417 | zfsctl_snapshot_rele(se); | |
418 | error = 0; | |
419 | } | |
5ed27c57 | 420 | rw_exit(&zfs_snapshot_lock); |
278bee93 BB |
421 | |
422 | return (error); | |
423 | } | |
424 | ||
425 | /* | |
426 | * Check if snapname is currently mounted. Returned non-zero when mounted | |
427 | * and zero when unmounted. | |
428 | */ | |
429 | static boolean_t | |
4d55ea81 | 430 | zfsctl_snapshot_ismounted(const char *snapname) |
278bee93 BB |
431 | { |
432 | zfs_snapentry_t *se; | |
433 | boolean_t ismounted = B_FALSE; | |
434 | ||
5ed27c57 | 435 | rw_enter(&zfs_snapshot_lock, RW_READER); |
278bee93 BB |
436 | if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) { |
437 | zfsctl_snapshot_rele(se); | |
438 | ismounted = B_TRUE; | |
439 | } | |
5ed27c57 | 440 | rw_exit(&zfs_snapshot_lock); |
278bee93 BB |
441 | |
442 | return (ismounted); | |
443 | } | |
444 | ||
445 | /* | |
446 | * Check if the given inode is a part of the virtual .zfs directory. | |
447 | */ | |
ebe7e575 BB |
448 | boolean_t |
449 | zfsctl_is_node(struct inode *ip) | |
450 | { | |
451 | return (ITOZ(ip)->z_is_ctldir); | |
452 | } | |
453 | ||
278bee93 BB |
454 | /* |
455 | * Check if the given inode is a .zfs/snapshots/snapname directory. | |
456 | */ | |
ebe7e575 BB |
457 | boolean_t |
458 | zfsctl_is_snapdir(struct inode *ip) | |
459 | { | |
460 | return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); | |
461 | } | |
462 | ||
463 | /* | |
464 | * Allocate a new inode with the passed id and ops. | |
465 | */ | |
466 | static struct inode * | |
0037b49e | 467 | zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, |
ebe7e575 BB |
468 | const struct file_operations *fops, const struct inode_operations *ops) |
469 | { | |
6413c95f | 470 | inode_timespec_t now; |
ebe7e575 BB |
471 | struct inode *ip; |
472 | znode_t *zp; | |
473 | ||
0037b49e | 474 | ip = new_inode(zfsvfs->z_sb); |
ebe7e575 BB |
475 | if (ip == NULL) |
476 | return (NULL); | |
477 | ||
2946a1a1 | 478 | now = current_time(ip); |
ebe7e575 BB |
479 | zp = ITOZ(ip); |
480 | ASSERT3P(zp->z_dirlocks, ==, NULL); | |
481 | ASSERT3P(zp->z_acl_cached, ==, NULL); | |
482 | ASSERT3P(zp->z_xattr_cached, ==, NULL); | |
483 | zp->z_id = id; | |
a43570c5 TK |
484 | zp->z_unlinked = B_FALSE; |
485 | zp->z_atime_dirty = B_FALSE; | |
486 | zp->z_zn_prefetch = B_FALSE; | |
a43570c5 TK |
487 | zp->z_is_sa = B_FALSE; |
488 | zp->z_is_mapped = B_FALSE; | |
489 | zp->z_is_ctldir = B_TRUE; | |
490 | zp->z_is_stale = B_FALSE; | |
ebe7e575 BB |
491 | zp->z_sa_hdl = NULL; |
492 | zp->z_blksz = 0; | |
493 | zp->z_seq = 0; | |
494 | zp->z_mapcnt = 0; | |
ebe7e575 | 495 | zp->z_size = 0; |
ebe7e575 | 496 | zp->z_pflags = 0; |
ebe7e575 BB |
497 | zp->z_mode = 0; |
498 | zp->z_sync_cnt = 0; | |
278f2236 | 499 | ip->i_generation = 0; |
ebe7e575 | 500 | ip->i_ino = id; |
f74b821a | 501 | ip->i_mode = (S_IFDIR | S_IRWXUGO); |
570d6edf RY |
502 | ip->i_uid = SUID_TO_KUID(0); |
503 | ip->i_gid = SGID_TO_KGID(0); | |
ebe7e575 BB |
504 | ip->i_blkbits = SPA_MINBLOCKSHIFT; |
505 | ip->i_atime = now; | |
506 | ip->i_mtime = now; | |
507 | ip->i_ctime = now; | |
508 | ip->i_fop = fops; | |
509 | ip->i_op = ops; | |
9f7b066b | 510 | #if defined(IOP_XATTR) |
511 | ip->i_opflags &= ~IOP_XATTR; | |
512 | #endif | |
ebe7e575 BB |
513 | |
514 | if (insert_inode_locked(ip)) { | |
515 | unlock_new_inode(ip); | |
516 | iput(ip); | |
517 | return (NULL); | |
518 | } | |
519 | ||
0037b49e BB |
520 | mutex_enter(&zfsvfs->z_znodes_lock); |
521 | list_insert_tail(&zfsvfs->z_all_znodes, zp); | |
522 | zfsvfs->z_nr_znodes++; | |
ebe7e575 | 523 | membar_producer(); |
0037b49e | 524 | mutex_exit(&zfsvfs->z_znodes_lock); |
ebe7e575 BB |
525 | |
526 | unlock_new_inode(ip); | |
527 | ||
528 | return (ip); | |
529 | } | |
530 | ||
531 | /* | |
532 | * Lookup the inode with given id, it will be allocated if needed. | |
533 | */ | |
534 | static struct inode * | |
0037b49e | 535 | zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id, |
ebe7e575 BB |
536 | const struct file_operations *fops, const struct inode_operations *ops) |
537 | { | |
538 | struct inode *ip = NULL; | |
539 | ||
540 | while (ip == NULL) { | |
0037b49e | 541 | ip = ilookup(zfsvfs->z_sb, (unsigned long)id); |
ebe7e575 BB |
542 | if (ip) |
543 | break; | |
544 | ||
545 | /* May fail due to concurrent zfsctl_inode_alloc() */ | |
0037b49e | 546 | ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops); |
ebe7e575 BB |
547 | } |
548 | ||
549 | return (ip); | |
550 | } | |
551 | ||
ebe7e575 BB |
552 | /* |
553 | * Create the '.zfs' directory. This directory is cached as part of the VFS | |
0037b49e | 554 | * structure. This results in a hold on the zfsvfs_t. The code in zfs_umount() |
ebe7e575 BB |
555 | * therefore checks against a vfs_count of 2 instead of 1. This reference |
556 | * is removed when the ctldir is destroyed in the unmount. All other entities | |
557 | * under the '.zfs' directory are created dynamically as needed. | |
fc173c85 BB |
558 | * |
559 | * Because the dynamically created '.zfs' directory entries assume the use | |
560 | * of 64-bit inode numbers this support must be disabled on 32-bit systems. | |
ebe7e575 BB |
561 | */ |
562 | int | |
0037b49e | 563 | zfsctl_create(zfsvfs_t *zfsvfs) |
ebe7e575 | 564 | { |
0037b49e | 565 | ASSERT(zfsvfs->z_ctldir == NULL); |
ebe7e575 | 566 | |
0037b49e | 567 | zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT, |
ebe7e575 | 568 | &zpl_fops_root, &zpl_ops_root); |
0037b49e | 569 | if (zfsvfs->z_ctldir == NULL) |
2e528b49 | 570 | return (SET_ERROR(ENOENT)); |
ebe7e575 BB |
571 | |
572 | return (0); | |
573 | } | |
574 | ||
575 | /* | |
278bee93 BB |
576 | * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name. |
577 | * Only called when the filesystem is unmounted. | |
ebe7e575 BB |
578 | */ |
579 | void | |
0037b49e | 580 | zfsctl_destroy(zfsvfs_t *zfsvfs) |
ebe7e575 | 581 | { |
0037b49e | 582 | if (zfsvfs->z_issnap) { |
278bee93 | 583 | zfs_snapentry_t *se; |
0037b49e BB |
584 | spa_t *spa = zfsvfs->z_os->os_spa; |
585 | uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); | |
278bee93 | 586 | |
5ed27c57 | 587 | rw_enter(&zfs_snapshot_lock, RW_WRITER); |
fd7265c6 RP |
588 | se = zfsctl_snapshot_find_by_objsetid(spa, objsetid); |
589 | if (se != NULL) | |
278bee93 | 590 | zfsctl_snapshot_remove(se); |
fd7265c6 RP |
591 | rw_exit(&zfs_snapshot_lock); |
592 | if (se != NULL) { | |
593 | zfsctl_snapshot_unmount_cancel(se); | |
278bee93 BB |
594 | zfsctl_snapshot_rele(se); |
595 | } | |
0037b49e BB |
596 | } else if (zfsvfs->z_ctldir) { |
597 | iput(zfsvfs->z_ctldir); | |
598 | zfsvfs->z_ctldir = NULL; | |
278bee93 | 599 | } |
ebe7e575 BB |
600 | } |
601 | ||
602 | /* | |
603 | * Given a root znode, retrieve the associated .zfs directory. | |
604 | * Add a hold to the vnode and return it. | |
605 | */ | |
606 | struct inode * | |
607 | zfsctl_root(znode_t *zp) | |
608 | { | |
609 | ASSERT(zfs_has_ctldir(zp)); | |
1daad981 AM |
610 | /* Must have an existing ref, so igrab() cannot return NULL */ |
611 | VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL); | |
ebe7e575 BB |
612 | return (ZTOZSB(zp)->z_ctldir); |
613 | } | |
9b77d1c9 | 614 | |
0500e835 | 615 | /* |
9b77d1c9 | 616 | * Generate a long fid to indicate a snapdir. We encode whether snapdir is |
e1cfd73f | 617 | * already mounted in gen field. We do this because nfsd lookup will not |
9b77d1c9 CC |
618 | * trigger automount. Next time the nfsd does fh_to_dentry, we will notice |
619 | * this and do automount and return ESTALE to force nfsd revalidate and follow | |
620 | * mount. | |
0500e835 BB |
621 | */ |
622 | static int | |
623 | zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp) | |
624 | { | |
0500e835 BB |
625 | zfid_short_t *zfid = (zfid_short_t *)fidp; |
626 | zfid_long_t *zlfid = (zfid_long_t *)fidp; | |
627 | uint32_t gen = 0; | |
628 | uint64_t object; | |
629 | uint64_t objsetid; | |
630 | int i; | |
9b77d1c9 CC |
631 | struct dentry *dentry; |
632 | ||
633 | if (fidp->fid_len < LONG_FID_LEN) { | |
634 | fidp->fid_len = LONG_FID_LEN; | |
635 | return (SET_ERROR(ENOSPC)); | |
636 | } | |
0500e835 | 637 | |
9b77d1c9 | 638 | object = ip->i_ino; |
0500e835 BB |
639 | objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino; |
640 | zfid->zf_len = LONG_FID_LEN; | |
641 | ||
9b77d1c9 CC |
642 | dentry = d_obtain_alias(igrab(ip)); |
643 | if (!IS_ERR(dentry)) { | |
644 | gen = !!d_mountpoint(dentry); | |
645 | dput(dentry); | |
646 | } | |
647 | ||
0500e835 BB |
648 | for (i = 0; i < sizeof (zfid->zf_object); i++) |
649 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
650 | ||
651 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
652 | zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); | |
653 | ||
654 | for (i = 0; i < sizeof (zlfid->zf_setid); i++) | |
655 | zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); | |
656 | ||
657 | for (i = 0; i < sizeof (zlfid->zf_setgen); i++) | |
658 | zlfid->zf_setgen[i] = 0; | |
659 | ||
660 | return (0); | |
661 | } | |
ebe7e575 | 662 | |
0500e835 BB |
663 | /* |
664 | * Generate an appropriate fid for an entry in the .zfs directory. | |
665 | */ | |
ebe7e575 BB |
666 | int |
667 | zfsctl_fid(struct inode *ip, fid_t *fidp) | |
668 | { | |
669 | znode_t *zp = ITOZ(ip); | |
0037b49e | 670 | zfsvfs_t *zfsvfs = ITOZSB(ip); |
ebe7e575 BB |
671 | uint64_t object = zp->z_id; |
672 | zfid_short_t *zfid; | |
673 | int i; | |
674 | ||
0037b49e | 675 | ZFS_ENTER(zfsvfs); |
ebe7e575 | 676 | |
9b77d1c9 | 677 | if (zfsctl_is_snapdir(ip)) { |
0037b49e | 678 | ZFS_EXIT(zfsvfs); |
9b77d1c9 | 679 | return (zfsctl_snapdir_fid(ip, fidp)); |
ebe7e575 BB |
680 | } |
681 | ||
9b77d1c9 CC |
682 | if (fidp->fid_len < SHORT_FID_LEN) { |
683 | fidp->fid_len = SHORT_FID_LEN; | |
0037b49e | 684 | ZFS_EXIT(zfsvfs); |
9b77d1c9 | 685 | return (SET_ERROR(ENOSPC)); |
0500e835 BB |
686 | } |
687 | ||
ebe7e575 BB |
688 | zfid = (zfid_short_t *)fidp; |
689 | ||
690 | zfid->zf_len = SHORT_FID_LEN; | |
691 | ||
692 | for (i = 0; i < sizeof (zfid->zf_object); i++) | |
693 | zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); | |
694 | ||
695 | /* .zfs znodes always have a generation number of 0 */ | |
696 | for (i = 0; i < sizeof (zfid->zf_gen); i++) | |
697 | zfid->zf_gen[i] = 0; | |
698 | ||
0037b49e | 699 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
700 | return (0); |
701 | } | |
702 | ||
278bee93 BB |
703 | /* |
704 | * Construct a full dataset name in full_name: "pool/dataset@snap_name" | |
705 | */ | |
ebe7e575 | 706 | static int |
0037b49e | 707 | zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len, |
278bee93 | 708 | char *full_name) |
ebe7e575 | 709 | { |
0037b49e | 710 | objset_t *os = zfsvfs->z_os; |
ebe7e575 | 711 | |
278bee93 | 712 | if (zfs_component_namecheck(snap_name, NULL, NULL) != 0) |
2e528b49 | 713 | return (SET_ERROR(EILSEQ)); |
ebe7e575 | 714 | |
278bee93 BB |
715 | dmu_objset_name(os, full_name); |
716 | if ((strlen(full_name) + 1 + strlen(snap_name)) >= len) | |
2e528b49 | 717 | return (SET_ERROR(ENAMETOOLONG)); |
ebe7e575 | 718 | |
278bee93 BB |
719 | (void) strcat(full_name, "@"); |
720 | (void) strcat(full_name, snap_name); | |
ebe7e575 BB |
721 | |
722 | return (0); | |
723 | } | |
724 | ||
0500e835 BB |
725 | /* |
726 | * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" | |
727 | */ | |
728 | static int | |
0037b49e | 729 | zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid, |
0500e835 BB |
730 | int path_len, char *full_path) |
731 | { | |
0037b49e | 732 | objset_t *os = zfsvfs->z_os; |
0500e835 BB |
733 | fstrans_cookie_t cookie; |
734 | char *snapname; | |
735 | boolean_t case_conflict; | |
736 | uint64_t id, pos = 0; | |
737 | int error = 0; | |
738 | ||
1c2555ef | 739 | if (zfsvfs->z_vfs->vfs_mntpoint == NULL) |
ecb2b7dc | 740 | return (SET_ERROR(ENOENT)); |
0500e835 BB |
741 | |
742 | cookie = spl_fstrans_mark(); | |
eca7b760 | 743 | snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
0500e835 BB |
744 | |
745 | while (error == 0) { | |
746 | dsl_pool_config_enter(dmu_objset_pool(os), FTAG); | |
0037b49e | 747 | error = dmu_snapshot_list_next(zfsvfs->z_os, |
eca7b760 IK |
748 | ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos, |
749 | &case_conflict); | |
0500e835 BB |
750 | dsl_pool_config_exit(dmu_objset_pool(os), FTAG); |
751 | if (error) | |
752 | goto out; | |
753 | ||
754 | if (id == objsetid) | |
755 | break; | |
756 | } | |
757 | ||
f0ce0436 | 758 | snprintf(full_path, path_len, "%s/.zfs/snapshot/%s", |
1c2555ef | 759 | zfsvfs->z_vfs->vfs_mntpoint, snapname); |
0500e835 | 760 | out: |
eca7b760 | 761 | kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); |
0500e835 BB |
762 | spl_fstrans_unmark(cookie); |
763 | ||
764 | return (error); | |
765 | } | |
766 | ||
ebe7e575 BB |
767 | /* |
768 | * Special case the handling of "..". | |
769 | */ | |
ebe7e575 | 770 | int |
4d55ea81 | 771 | zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp, |
ebe7e575 BB |
772 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) |
773 | { | |
0037b49e | 774 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
ebe7e575 BB |
775 | int error = 0; |
776 | ||
0037b49e | 777 | ZFS_ENTER(zfsvfs); |
ebe7e575 BB |
778 | |
779 | if (strcmp(name, "..") == 0) { | |
780 | *ipp = dip->i_sb->s_root->d_inode; | |
781 | } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { | |
0037b49e | 782 | *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR, |
ebe7e575 BB |
783 | &zpl_fops_snapdir, &zpl_ops_snapdir); |
784 | } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { | |
0037b49e | 785 | *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES, |
ebe7e575 BB |
786 | &zpl_fops_shares, &zpl_ops_shares); |
787 | } else { | |
788 | *ipp = NULL; | |
789 | } | |
790 | ||
791 | if (*ipp == NULL) | |
2e528b49 | 792 | error = SET_ERROR(ENOENT); |
ebe7e575 | 793 | |
0037b49e | 794 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
795 | |
796 | return (error); | |
797 | } | |
798 | ||
799 | /* | |
800 | * Lookup entry point for the 'snapshot' directory. Try to open the | |
801 | * snapshot if it exist, creating the pseudo filesystem inode as necessary. | |
ebe7e575 | 802 | */ |
ebe7e575 | 803 | int |
4d55ea81 | 804 | zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp, |
ebe7e575 BB |
805 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) |
806 | { | |
0037b49e | 807 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
ebe7e575 BB |
808 | uint64_t id; |
809 | int error; | |
810 | ||
0037b49e | 811 | ZFS_ENTER(zfsvfs); |
ebe7e575 | 812 | |
0037b49e | 813 | error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id); |
ebe7e575 | 814 | if (error) { |
0037b49e | 815 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
816 | return (error); |
817 | } | |
818 | ||
0037b49e | 819 | *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id, |
ebe7e575 | 820 | &simple_dir_operations, &simple_dir_inode_operations); |
278bee93 | 821 | if (*ipp == NULL) |
2e528b49 | 822 | error = SET_ERROR(ENOENT); |
ebe7e575 | 823 | |
0037b49e | 824 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
825 | |
826 | return (error); | |
827 | } | |
828 | ||
ebe7e575 BB |
829 | /* |
830 | * Renaming a directory under '.zfs/snapshot' will automatically trigger | |
831 | * a rename of the snapshot to the new given name. The rename is confined | |
832 | * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. | |
833 | */ | |
ebe7e575 | 834 | int |
4d55ea81 RM |
835 | zfsctl_snapdir_rename(struct inode *sdip, const char *snm, |
836 | struct inode *tdip, const char *tnm, cred_t *cr, int flags) | |
ebe7e575 | 837 | { |
0037b49e | 838 | zfsvfs_t *zfsvfs = ITOZSB(sdip); |
13fe0198 | 839 | char *to, *from, *real, *fsname; |
ebe7e575 BB |
840 | int error; |
841 | ||
0500e835 | 842 | if (!zfs_admin_snapshot) |
ecb2b7dc | 843 | return (SET_ERROR(EACCES)); |
0500e835 | 844 | |
0037b49e | 845 | ZFS_ENTER(zfsvfs); |
ebe7e575 | 846 | |
eca7b760 IK |
847 | to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
848 | from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); | |
849 | real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); | |
850 | fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); | |
ebe7e575 | 851 | |
0037b49e BB |
852 | if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { |
853 | error = dmu_snapshot_realname(zfsvfs->z_os, snm, real, | |
eca7b760 | 854 | ZFS_MAX_DATASET_NAME_LEN, NULL); |
ebe7e575 | 855 | if (error == 0) { |
13fe0198 | 856 | snm = real; |
ebe7e575 BB |
857 | } else if (error != ENOTSUP) { |
858 | goto out; | |
859 | } | |
860 | } | |
861 | ||
0037b49e | 862 | dmu_objset_name(zfsvfs->z_os, fsname); |
13fe0198 | 863 | |
eca7b760 IK |
864 | error = zfsctl_snapshot_name(ITOZSB(sdip), snm, |
865 | ZFS_MAX_DATASET_NAME_LEN, from); | |
13fe0198 | 866 | if (error == 0) |
eca7b760 | 867 | error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, |
02730c33 | 868 | ZFS_MAX_DATASET_NAME_LEN, to); |
13fe0198 | 869 | if (error == 0) |
ebe7e575 | 870 | error = zfs_secpolicy_rename_perms(from, to, cr); |
13fe0198 | 871 | if (error != 0) |
ebe7e575 BB |
872 | goto out; |
873 | ||
874 | /* | |
875 | * Cannot move snapshots out of the snapdir. | |
876 | */ | |
877 | if (sdip != tdip) { | |
2e528b49 | 878 | error = SET_ERROR(EINVAL); |
ebe7e575 BB |
879 | goto out; |
880 | } | |
881 | ||
882 | /* | |
883 | * No-op when names are identical. | |
884 | */ | |
13fe0198 | 885 | if (strcmp(snm, tnm) == 0) { |
ebe7e575 BB |
886 | error = 0; |
887 | goto out; | |
888 | } | |
889 | ||
5ed27c57 | 890 | rw_enter(&zfs_snapshot_lock, RW_WRITER); |
ebe7e575 | 891 | |
13fe0198 | 892 | error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE); |
278bee93 BB |
893 | if (error == 0) |
894 | (void) zfsctl_snapshot_rename(snm, tnm); | |
ebe7e575 | 895 | |
5ed27c57 | 896 | rw_exit(&zfs_snapshot_lock); |
ebe7e575 | 897 | out: |
eca7b760 IK |
898 | kmem_free(from, ZFS_MAX_DATASET_NAME_LEN); |
899 | kmem_free(to, ZFS_MAX_DATASET_NAME_LEN); | |
900 | kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); | |
901 | kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); | |
ebe7e575 | 902 | |
0037b49e | 903 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
904 | |
905 | return (error); | |
906 | } | |
907 | ||
908 | /* | |
909 | * Removing a directory under '.zfs/snapshot' will automatically trigger | |
910 | * the removal of the snapshot with the given name. | |
911 | */ | |
ebe7e575 | 912 | int |
4d55ea81 RM |
913 | zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr, |
914 | int flags) | |
ebe7e575 | 915 | { |
0037b49e | 916 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
ebe7e575 BB |
917 | char *snapname, *real; |
918 | int error; | |
919 | ||
0500e835 | 920 | if (!zfs_admin_snapshot) |
ecb2b7dc | 921 | return (SET_ERROR(EACCES)); |
0500e835 | 922 | |
0037b49e | 923 | ZFS_ENTER(zfsvfs); |
ebe7e575 | 924 | |
eca7b760 IK |
925 | snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
926 | real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); | |
ebe7e575 | 927 | |
0037b49e BB |
928 | if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { |
929 | error = dmu_snapshot_realname(zfsvfs->z_os, name, real, | |
eca7b760 | 930 | ZFS_MAX_DATASET_NAME_LEN, NULL); |
ebe7e575 BB |
931 | if (error == 0) { |
932 | name = real; | |
933 | } else if (error != ENOTSUP) { | |
934 | goto out; | |
935 | } | |
936 | } | |
937 | ||
eca7b760 IK |
938 | error = zfsctl_snapshot_name(ITOZSB(dip), name, |
939 | ZFS_MAX_DATASET_NAME_LEN, snapname); | |
13fe0198 | 940 | if (error == 0) |
ebe7e575 | 941 | error = zfs_secpolicy_destroy_perms(snapname, cr); |
13fe0198 | 942 | if (error != 0) |
ebe7e575 BB |
943 | goto out; |
944 | ||
278bee93 | 945 | error = zfsctl_snapshot_unmount(snapname, MNT_FORCE); |
ebe7e575 | 946 | if ((error == 0) || (error == ENOENT)) |
13fe0198 | 947 | error = dsl_destroy_snapshot(snapname, B_FALSE); |
ebe7e575 | 948 | out: |
eca7b760 IK |
949 | kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); |
950 | kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); | |
ebe7e575 | 951 | |
0037b49e | 952 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
953 | |
954 | return (error); | |
955 | } | |
956 | ||
957 | /* | |
958 | * Creating a directory under '.zfs/snapshot' will automatically trigger | |
959 | * the creation of a new snapshot with the given name. | |
960 | */ | |
ebe7e575 | 961 | int |
4d55ea81 | 962 | zfsctl_snapdir_mkdir(struct inode *dip, const char *dirname, vattr_t *vap, |
4ea3f864 | 963 | struct inode **ipp, cred_t *cr, int flags) |
ebe7e575 | 964 | { |
0037b49e | 965 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
ebe7e575 BB |
966 | char *dsname; |
967 | int error; | |
968 | ||
0500e835 | 969 | if (!zfs_admin_snapshot) |
ecb2b7dc | 970 | return (SET_ERROR(EACCES)); |
0500e835 | 971 | |
eca7b760 | 972 | dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
ebe7e575 | 973 | |
da536844 | 974 | if (zfs_component_namecheck(dirname, NULL, NULL) != 0) { |
2e528b49 | 975 | error = SET_ERROR(EILSEQ); |
ebe7e575 BB |
976 | goto out; |
977 | } | |
978 | ||
0037b49e | 979 | dmu_objset_name(zfsvfs->z_os, dsname); |
ebe7e575 BB |
980 | |
981 | error = zfs_secpolicy_snapshot_perms(dsname, cr); | |
13fe0198 | 982 | if (error != 0) |
ebe7e575 BB |
983 | goto out; |
984 | ||
985 | if (error == 0) { | |
6f1ffb06 | 986 | error = dmu_objset_snapshot_one(dsname, dirname); |
13fe0198 | 987 | if (error != 0) |
ebe7e575 BB |
988 | goto out; |
989 | ||
990 | error = zfsctl_snapdir_lookup(dip, dirname, ipp, | |
991 | 0, cr, NULL, NULL); | |
992 | } | |
993 | out: | |
eca7b760 | 994 | kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); |
ebe7e575 BB |
995 | |
996 | return (error); | |
997 | } | |
998 | ||
b9007997 YY |
999 | /* |
1000 | * Flush everything out of the kernel's export table and such. | |
1001 | * This is needed as once the snapshot is used over NFS, its | |
1002 | * entries in svc_export and svc_expkey caches hold reference | |
1003 | * to the snapshot mount point. There is no known way of flushing | |
1004 | * only the entries related to the snapshot. | |
1005 | */ | |
1006 | static void | |
1007 | exportfs_flush(void) | |
1008 | { | |
1009 | char *argv[] = { "/usr/sbin/exportfs", "-f", NULL }; | |
1010 | char *envp[] = { NULL }; | |
1011 | ||
1012 | (void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); | |
1013 | } | |
1014 | ||
ebe7e575 BB |
1015 | /* |
1016 | * Attempt to unmount a snapshot by making a call to user space. | |
1017 | * There is no assurance that this can or will succeed, is just a | |
1018 | * best effort. In the case where it does fail, perhaps because | |
1019 | * it's in use, the unmount will fail harmlessly. | |
1020 | */ | |
278bee93 | 1021 | int |
4d55ea81 | 1022 | zfsctl_snapshot_unmount(const char *snapname, int flags) |
ebe7e575 | 1023 | { |
5dc1ff29 SE |
1024 | char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL, |
1025 | NULL }; | |
ebe7e575 | 1026 | char *envp[] = { NULL }; |
278bee93 | 1027 | zfs_snapentry_t *se; |
ebe7e575 BB |
1028 | int error; |
1029 | ||
5ed27c57 | 1030 | rw_enter(&zfs_snapshot_lock, RW_READER); |
278bee93 | 1031 | if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) { |
5ed27c57 | 1032 | rw_exit(&zfs_snapshot_lock); |
ecb2b7dc | 1033 | return (SET_ERROR(ENOENT)); |
278bee93 | 1034 | } |
5ed27c57 | 1035 | rw_exit(&zfs_snapshot_lock); |
278bee93 | 1036 | |
b9007997 YY |
1037 | exportfs_flush(); |
1038 | ||
5dc1ff29 SE |
1039 | if (flags & MNT_FORCE) |
1040 | argv[4] = "-fn"; | |
1041 | argv[5] = se->se_path; | |
278bee93 | 1042 | dprintf("unmount; path=%s\n", se->se_path); |
761394b3 | 1043 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
00b65db7 | 1044 | zfsctl_snapshot_rele(se); |
ebe7e575 | 1045 | |
278bee93 | 1046 | |
ebe7e575 BB |
1047 | /* |
1048 | * The umount system utility will return 256 on error. We must | |
1049 | * assume this error is because the file system is busy so it is | |
1050 | * converted to the more sensible EBUSY. | |
1051 | */ | |
1052 | if (error) | |
2e528b49 | 1053 | error = SET_ERROR(EBUSY); |
ebe7e575 | 1054 | |
ebe7e575 BB |
1055 | return (error); |
1056 | } | |
1057 | ||
ebe7e575 | 1058 | int |
278bee93 | 1059 | zfsctl_snapshot_mount(struct path *path, int flags) |
ebe7e575 BB |
1060 | { |
1061 | struct dentry *dentry = path->dentry; | |
1062 | struct inode *ip = dentry->d_inode; | |
0037b49e BB |
1063 | zfsvfs_t *zfsvfs; |
1064 | zfsvfs_t *snap_zfsvfs; | |
278bee93 | 1065 | zfs_snapentry_t *se; |
ebe7e575 | 1066 | char *full_name, *full_path; |
5dc1ff29 SE |
1067 | char *argv[] = { "/usr/bin/env", "mount", "-t", "zfs", "-n", NULL, NULL, |
1068 | NULL }; | |
ebe7e575 BB |
1069 | char *envp[] = { NULL }; |
1070 | int error; | |
d287880a | 1071 | struct path spath; |
ebe7e575 | 1072 | |
278bee93 | 1073 | if (ip == NULL) |
ecb2b7dc | 1074 | return (SET_ERROR(EISDIR)); |
278bee93 | 1075 | |
0037b49e BB |
1076 | zfsvfs = ITOZSB(ip); |
1077 | ZFS_ENTER(zfsvfs); | |
ebe7e575 | 1078 | |
eca7b760 | 1079 | full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); |
278bee93 | 1080 | full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); |
ebe7e575 | 1081 | |
0037b49e | 1082 | error = zfsctl_snapshot_name(zfsvfs, dname(dentry), |
eca7b760 | 1083 | ZFS_MAX_DATASET_NAME_LEN, full_name); |
ebe7e575 BB |
1084 | if (error) |
1085 | goto error; | |
1086 | ||
df358db7 TK |
1087 | /* |
1088 | * Construct a mount point path from sb of the ctldir inode and dirent | |
1089 | * name, instead of from d_path(), so that chroot'd process doesn't fail | |
1090 | * on mount.zfs(8). | |
1091 | */ | |
1092 | snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s", | |
6bd4f454 BB |
1093 | zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "", |
1094 | dname(dentry)); | |
ebe7e575 | 1095 | |
278bee93 BB |
1096 | /* |
1097 | * Multiple concurrent automounts of a snapshot are never allowed. | |
1098 | * The snapshot may be manually mounted as many times as desired. | |
1099 | */ | |
1100 | if (zfsctl_snapshot_ismounted(full_name)) { | |
19976601 | 1101 | error = 0; |
278bee93 BB |
1102 | goto error; |
1103 | } | |
1104 | ||
ebe7e575 BB |
1105 | /* |
1106 | * Attempt to mount the snapshot from user space. Normally this | |
1107 | * would be done using the vfs_kern_mount() function, however that | |
1108 | * function is marked GPL-only and cannot be used. On error we | |
1109 | * careful to log the real error to the console and return EISDIR | |
1110 | * to safely abort the automount. This should be very rare. | |
fd4f7616 TC |
1111 | * |
1112 | * If the user mode helper happens to return EBUSY, a concurrent | |
1113 | * mount is already in progress in which case the error is ignored. | |
1114 | * Take note that if the program was executed successfully the return | |
1115 | * value from call_usermodehelper() will be (exitcode << 8 + signal). | |
ebe7e575 | 1116 | */ |
278bee93 | 1117 | dprintf("mount; name=%s path=%s\n", full_name, full_path); |
5dc1ff29 SE |
1118 | argv[5] = full_name; |
1119 | argv[6] = full_path; | |
761394b3 | 1120 | error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); |
d287880a CC |
1121 | if (error) { |
1122 | if (!(error & MOUNT_BUSY << 8)) { | |
6dbca94f PZ |
1123 | zfs_dbgmsg("Unable to automount %s error=%d", |
1124 | full_path, error); | |
d287880a CC |
1125 | error = SET_ERROR(EISDIR); |
1126 | } else { | |
1127 | /* | |
1128 | * EBUSY, this could mean a concurrent mount, or the | |
1129 | * snapshot has already been mounted at completely | |
1130 | * different place. We return 0 so VFS will retry. For | |
1131 | * the latter case the VFS will retry several times | |
1132 | * and return ELOOP, which is probably not a very good | |
1133 | * behavior. | |
1134 | */ | |
1135 | error = 0; | |
1136 | } | |
ebe7e575 BB |
1137 | goto error; |
1138 | } | |
1139 | ||
ebe7e575 | 1140 | /* |
278bee93 BB |
1141 | * Follow down in to the mounted snapshot and set MNT_SHRINKABLE |
1142 | * to identify this as an automounted filesystem. | |
ebe7e575 | 1143 | */ |
d287880a CC |
1144 | spath = *path; |
1145 | path_get(&spath); | |
066e8252 | 1146 | if (follow_down_one(&spath)) { |
0037b49e BB |
1147 | snap_zfsvfs = ITOZSB(spath.dentry->d_inode); |
1148 | snap_zfsvfs->z_parent = zfsvfs; | |
d287880a CC |
1149 | dentry = spath.dentry; |
1150 | spath.mnt->mnt_flags |= MNT_SHRINKABLE; | |
ebe7e575 | 1151 | |
5ed27c57 | 1152 | rw_enter(&zfs_snapshot_lock, RW_WRITER); |
d287880a | 1153 | se = zfsctl_snapshot_alloc(full_name, full_path, |
0037b49e | 1154 | snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os), |
24ef51f6 | 1155 | dentry); |
d287880a CC |
1156 | zfsctl_snapshot_add(se); |
1157 | zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); | |
5ed27c57 | 1158 | rw_exit(&zfs_snapshot_lock); |
d287880a CC |
1159 | } |
1160 | path_put(&spath); | |
ebe7e575 | 1161 | error: |
eca7b760 | 1162 | kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN); |
278bee93 | 1163 | kmem_free(full_path, MAXPATHLEN); |
ebe7e575 | 1164 | |
0037b49e | 1165 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
1166 | |
1167 | return (error); | |
1168 | } | |
1169 | ||
1170 | /* | |
9b77d1c9 | 1171 | * Get the snapdir inode from fid |
ebe7e575 | 1172 | */ |
ebe7e575 | 1173 | int |
9b77d1c9 CC |
1174 | zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen, |
1175 | struct inode **ipp) | |
ebe7e575 | 1176 | { |
ebe7e575 | 1177 | int error; |
9b77d1c9 CC |
1178 | struct path path; |
1179 | char *mnt; | |
1180 | struct dentry *dentry; | |
d4787d55 | 1181 | |
9b77d1c9 | 1182 | mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP); |
d4787d55 | 1183 | |
9b77d1c9 CC |
1184 | error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid, |
1185 | MAXPATHLEN, mnt); | |
1186 | if (error) | |
1187 | goto out; | |
d4787d55 | 1188 | |
9b77d1c9 | 1189 | /* Trigger automount */ |
cfa37548 | 1190 | error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path); |
9b77d1c9 CC |
1191 | if (error) |
1192 | goto out; | |
ebe7e575 | 1193 | |
9b77d1c9 | 1194 | path_put(&path); |
0500e835 | 1195 | /* |
9b77d1c9 CC |
1196 | * Get the snapdir inode. Note, we don't want to use the above |
1197 | * path because it contains the root of the snapshot rather | |
1198 | * than the snapdir. | |
0500e835 | 1199 | */ |
9b77d1c9 CC |
1200 | *ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid); |
1201 | if (*ipp == NULL) { | |
1202 | error = SET_ERROR(ENOENT); | |
1203 | goto out; | |
0500e835 BB |
1204 | } |
1205 | ||
9b77d1c9 CC |
1206 | /* check gen, see zfsctl_snapdir_fid */ |
1207 | dentry = d_obtain_alias(igrab(*ipp)); | |
1208 | if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) { | |
1209 | iput(*ipp); | |
1210 | *ipp = NULL; | |
1211 | error = SET_ERROR(ENOENT); | |
1212 | } | |
1213 | if (!IS_ERR(dentry)) | |
1214 | dput(dentry); | |
1215 | out: | |
1216 | kmem_free(mnt, MAXPATHLEN); | |
ebe7e575 BB |
1217 | return (error); |
1218 | } | |
1219 | ||
ebe7e575 BB |
1220 | int |
1221 | zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, | |
1222 | int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) | |
1223 | { | |
0037b49e | 1224 | zfsvfs_t *zfsvfs = ITOZSB(dip); |
657ce253 | 1225 | znode_t *zp; |
ebe7e575 BB |
1226 | znode_t *dzp; |
1227 | int error; | |
1228 | ||
0037b49e | 1229 | ZFS_ENTER(zfsvfs); |
ebe7e575 | 1230 | |
0037b49e BB |
1231 | if (zfsvfs->z_shares_dir == 0) { |
1232 | ZFS_EXIT(zfsvfs); | |
2e528b49 | 1233 | return (SET_ERROR(ENOTSUP)); |
ebe7e575 BB |
1234 | } |
1235 | ||
0037b49e | 1236 | if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { |
657ce253 MM |
1237 | error = zfs_lookup(dzp, name, &zp, 0, cr, NULL, NULL); |
1238 | zrele(dzp); | |
ebe7e575 BB |
1239 | } |
1240 | ||
0037b49e | 1241 | ZFS_EXIT(zfsvfs); |
ebe7e575 BB |
1242 | |
1243 | return (error); | |
1244 | } | |
1245 | ||
ebe7e575 BB |
1246 | /* |
1247 | * Initialize the various pieces we'll need to create and manipulate .zfs | |
1248 | * directories. Currently this is unused but available. | |
1249 | */ | |
1250 | void | |
1251 | zfsctl_init(void) | |
1252 | { | |
278bee93 BB |
1253 | avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name, |
1254 | sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, | |
1255 | se_node_name)); | |
1256 | avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid, | |
1257 | sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, | |
1258 | se_node_objsetid)); | |
5ed27c57 | 1259 | rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL); |
ebe7e575 BB |
1260 | } |
1261 | ||
1262 | /* | |
1263 | * Cleanup the various pieces we needed for .zfs directories. In particular | |
1264 | * ensure the expiry timer is canceled safely. | |
1265 | */ | |
1266 | void | |
1267 | zfsctl_fini(void) | |
1268 | { | |
278bee93 BB |
1269 | avl_destroy(&zfs_snapshots_by_name); |
1270 | avl_destroy(&zfs_snapshots_by_objsetid); | |
5ed27c57 | 1271 | rw_destroy(&zfs_snapshot_lock); |
ebe7e575 BB |
1272 | } |
1273 | ||
0500e835 BB |
1274 | module_param(zfs_admin_snapshot, int, 0644); |
1275 | MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot"); | |
1276 | ||
ebe7e575 BB |
1277 | module_param(zfs_expire_snapshot, int, 0644); |
1278 | MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); |