]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/dsl_dir.c
OpenZFS 7431 - ZFS Channel Programs
[mirror_zfs.git] / module / zfs / dsl_dir.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
428870ff 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
39efbde7 23 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
b1118acb 24 * Copyright (c) 2013 Martin Matuska. All rights reserved.
788eb90c 25 * Copyright (c) 2014 Joyent, Inc. All rights reserved.
0c66c32d 26 * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
a0bd735a 27 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
34dc7c2f
BB
28 */
29
34dc7c2f
BB
30#include <sys/dmu.h>
31#include <sys/dmu_objset.h>
32#include <sys/dmu_tx.h>
33#include <sys/dsl_dataset.h>
34#include <sys/dsl_dir.h>
35#include <sys/dsl_prop.h>
36#include <sys/dsl_synctask.h>
37#include <sys/dsl_deleg.h>
fa86b5db 38#include <sys/dmu_impl.h>
34dc7c2f 39#include <sys/spa.h>
ae76f45c 40#include <sys/spa_impl.h>
428870ff 41#include <sys/metaslab.h>
34dc7c2f
BB
42#include <sys/zap.h>
43#include <sys/zio.h>
44#include <sys/arc.h>
45#include <sys/sunddi.h>
788eb90c
JJ
46#include <sys/zfeature.h>
47#include <sys/policy.h>
48#include <sys/zfs_znode.h>
ba6a2402 49#include <sys/zvol.h>
34dc7c2f 50#include "zfs_namecheck.h"
788eb90c
JJ
51#include "zfs_prop.h"
52
53/*
54 * Filesystem and Snapshot Limits
55 * ------------------------------
56 *
57 * These limits are used to restrict the number of filesystems and/or snapshots
58 * that can be created at a given level in the tree or below. A typical
59 * use-case is with a delegated dataset where the administrator wants to ensure
60 * that a user within the zone is not creating too many additional filesystems
61 * or snapshots, even though they're not exceeding their space quota.
62 *
63 * The filesystem and snapshot counts are stored as extensible properties. This
64 * capability is controlled by a feature flag and must be enabled to be used.
65 * Once enabled, the feature is not active until the first limit is set. At
66 * that point, future operations to create/destroy filesystems or snapshots
67 * will validate and update the counts.
68 *
69 * Because the count properties will not exist before the feature is active,
70 * the counts are updated when a limit is first set on an uninitialized
71 * dsl_dir node in the tree (The filesystem/snapshot count on a node includes
72 * all of the nested filesystems/snapshots. Thus, a new leaf node has a
73 * filesystem count of 0 and a snapshot count of 0. Non-existent filesystem and
74 * snapshot count properties on a node indicate uninitialized counts on that
75 * node.) When first setting a limit on an uninitialized node, the code starts
76 * at the filesystem with the new limit and descends into all sub-filesystems
77 * to add the count properties.
78 *
79 * In practice this is lightweight since a limit is typically set when the
80 * filesystem is created and thus has no children. Once valid, changing the
81 * limit value won't require a re-traversal since the counts are already valid.
82 * When recursively fixing the counts, if a node with a limit is encountered
83 * during the descent, the counts are known to be valid and there is no need to
84 * descend into that filesystem's children. The counts on filesystems above the
85 * one with the new limit will still be uninitialized, unless a limit is
86 * eventually set on one of those filesystems. The counts are always recursively
87 * updated when a limit is set on a dataset, unless there is already a limit.
88 * When a new limit value is set on a filesystem with an existing limit, it is
89 * possible for the new limit to be less than the current count at that level
90 * since a user who can change the limit is also allowed to exceed the limit.
91 *
92 * Once the feature is active, then whenever a filesystem or snapshot is
93 * created, the code recurses up the tree, validating the new count against the
94 * limit at each initialized level. In practice, most levels will not have a
95 * limit set. If there is a limit at any initialized level up the tree, the
96 * check must pass or the creation will fail. Likewise, when a filesystem or
97 * snapshot is destroyed, the counts are recursively adjusted all the way up
98 * the initizized nodes in the tree. Renaming a filesystem into different point
99 * in the tree will first validate, then update the counts on each branch up to
100 * the common ancestor. A receive will also validate the counts and then update
101 * them.
102 *
103 * An exception to the above behavior is that the limit is not enforced if the
104 * user has permission to modify the limit. This is primarily so that
105 * recursive snapshots in the global zone always work. We want to prevent a
106 * denial-of-service in which a lower level delegated dataset could max out its
107 * limit and thus block recursive snapshots from being taken in the global zone.
108 * Because of this, it is possible for the snapshot count to be over the limit
109 * and snapshots taken in the global zone could cause a lower level dataset to
110 * hit or exceed its limit. The administrator taking the global zone recursive
111 * snapshot should be aware of this side-effect and behave accordingly.
112 * For consistency, the filesystem limit is also not enforced if the user can
113 * modify the limit.
114 *
115 * The filesystem and snapshot limits are validated by dsl_fs_ss_limit_check()
116 * and updated by dsl_fs_ss_count_adjust(). A new limit value is setup in
117 * dsl_dir_activate_fs_ss_limit() and the counts are adjusted, if necessary, by
118 * dsl_dir_init_fs_ss_count().
119 *
120 * There is a special case when we receive a filesystem that already exists. In
121 * this case a temporary clone name of %X is created (see dmu_recv_begin). We
122 * never update the filesystem counts for temporary clones.
123 *
124 * Likewise, we do not update the snapshot counts for temporary snapshots,
125 * such as those created by zfs diff.
126 */
34dc7c2f 127
d683ddbb
JG
128extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
129
34dc7c2f 130static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
34dc7c2f 131
34dc7c2f 132static void
39efbde7 133dsl_dir_evict_async(void *dbu)
34dc7c2f 134{
0c66c32d 135 dsl_dir_t *dd = dbu;
34dc7c2f 136 int t;
d1d7e268 137 ASSERTV(dsl_pool_t *dp = dd->dd_pool);
34dc7c2f 138
0c66c32d
JG
139 dd->dd_dbuf = NULL;
140
34dc7c2f
BB
141 for (t = 0; t < TXG_SIZE; t++) {
142 ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
143 ASSERT(dd->dd_tempreserved[t] == 0);
144 ASSERT(dd->dd_space_towrite[t] == 0);
145 }
146
34dc7c2f 147 if (dd->dd_parent)
0c66c32d 148 dsl_dir_async_rele(dd->dd_parent, dd);
34dc7c2f 149
0c66c32d 150 spa_async_close(dd->dd_pool->dp_spa, dd);
34dc7c2f 151
0eb21616 152 dsl_prop_fini(dd);
34dc7c2f
BB
153 mutex_destroy(&dd->dd_lock);
154 kmem_free(dd, sizeof (dsl_dir_t));
155}
156
157int
13fe0198 158dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
34dc7c2f
BB
159 const char *tail, void *tag, dsl_dir_t **ddp)
160{
161 dmu_buf_t *dbuf;
162 dsl_dir_t *dd;
b5256303 163 dmu_object_info_t doi;
34dc7c2f
BB
164 int err;
165
13fe0198 166 ASSERT(dsl_pool_config_held(dp));
34dc7c2f
BB
167
168 err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
13fe0198 169 if (err != 0)
34dc7c2f
BB
170 return (err);
171 dd = dmu_buf_get_user(dbuf);
b5256303
TC
172
173 dmu_object_info_from_db(dbuf, &doi);
174 ASSERT3U(doi.doi_bonus_type, ==, DMU_OT_DSL_DIR);
175 ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
176
34dc7c2f
BB
177 if (dd == NULL) {
178 dsl_dir_t *winner;
34dc7c2f 179
79c76d5b 180 dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
34dc7c2f
BB
181 dd->dd_object = ddobj;
182 dd->dd_dbuf = dbuf;
183 dd->dd_pool = dp;
b5256303
TC
184
185 if (dsl_dir_is_zapified(dd) &&
186 zap_contains(dp->dp_meta_objset, ddobj,
187 DD_FIELD_CRYPTO_KEY_OBJ) == 0) {
188 VERIFY0(zap_lookup(dp->dp_meta_objset,
189 ddobj, DD_FIELD_CRYPTO_KEY_OBJ,
190 sizeof (uint64_t), 1, &dd->dd_crypto_obj));
ae76f45c
TC
191
192 /* check for on-disk format errata */
193 if (dsl_dir_incompatible_encryption_version(dd)) {
194 dp->dp_spa->spa_errata =
195 ZPOOL_ERRATA_ZOL_6845_ENCRYPTION;
196 }
b5256303
TC
197 }
198
34dc7c2f 199 mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
0eb21616 200 dsl_prop_init(dd);
34dc7c2f 201
428870ff
BB
202 dsl_dir_snap_cmtime_update(dd);
203
d683ddbb
JG
204 if (dsl_dir_phys(dd)->dd_parent_obj) {
205 err = dsl_dir_hold_obj(dp,
206 dsl_dir_phys(dd)->dd_parent_obj, NULL, dd,
207 &dd->dd_parent);
13fe0198 208 if (err != 0)
b128c09f 209 goto errout;
34dc7c2f
BB
210 if (tail) {
211#ifdef ZFS_DEBUG
212 uint64_t foundobj;
213
214 err = zap_lookup(dp->dp_meta_objset,
d683ddbb
JG
215 dsl_dir_phys(dd->dd_parent)->
216 dd_child_dir_zapobj, tail,
217 sizeof (foundobj), 1, &foundobj);
34dc7c2f
BB
218 ASSERT(err || foundobj == ddobj);
219#endif
680eada9 220 (void) strlcpy(dd->dd_myname, tail,
221 sizeof (dd->dd_myname));
34dc7c2f
BB
222 } else {
223 err = zap_value_search(dp->dp_meta_objset,
d683ddbb
JG
224 dsl_dir_phys(dd->dd_parent)->
225 dd_child_dir_zapobj,
34dc7c2f
BB
226 ddobj, 0, dd->dd_myname);
227 }
13fe0198 228 if (err != 0)
b128c09f 229 goto errout;
34dc7c2f
BB
230 } else {
231 (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
232 }
233
428870ff
BB
234 if (dsl_dir_is_clone(dd)) {
235 dmu_buf_t *origin_bonus;
236 dsl_dataset_phys_t *origin_phys;
237
238 /*
239 * We can't open the origin dataset, because
240 * that would require opening this dsl_dir.
241 * Just look at its phys directly instead.
242 */
243 err = dmu_bonus_hold(dp->dp_meta_objset,
d683ddbb
JG
244 dsl_dir_phys(dd)->dd_origin_obj, FTAG,
245 &origin_bonus);
13fe0198 246 if (err != 0)
428870ff
BB
247 goto errout;
248 origin_phys = origin_bonus->db_data;
249 dd->dd_origin_txg =
250 origin_phys->ds_creation_txg;
251 dmu_buf_rele(origin_bonus, FTAG);
252 }
253
39efbde7
GM
254 dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
255 &dd->dd_dbuf);
0c66c32d
JG
256 winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
257 if (winner != NULL) {
34dc7c2f 258 if (dd->dd_parent)
13fe0198 259 dsl_dir_rele(dd->dd_parent, dd);
0eb21616 260 dsl_prop_fini(dd);
34dc7c2f
BB
261 mutex_destroy(&dd->dd_lock);
262 kmem_free(dd, sizeof (dsl_dir_t));
263 dd = winner;
264 } else {
265 spa_open_ref(dp->dp_spa, dd);
266 }
267 }
268
269 /*
270 * The dsl_dir_t has both open-to-close and instantiate-to-evict
271 * holds on the spa. We need the open-to-close holds because
272 * otherwise the spa_refcnt wouldn't change when we open a
273 * dir which the spa also has open, so we could incorrectly
274 * think it was OK to unload/export/destroy the pool. We need
275 * the instantiate-to-evict hold because the dsl_dir_t has a
276 * pointer to the dd_pool, which has a pointer to the spa_t.
277 */
278 spa_open_ref(dp->dp_spa, tag);
279 ASSERT3P(dd->dd_pool, ==, dp);
280 ASSERT3U(dd->dd_object, ==, ddobj);
281 ASSERT3P(dd->dd_dbuf, ==, dbuf);
282 *ddp = dd;
283 return (0);
b128c09f
BB
284
285errout:
286 if (dd->dd_parent)
13fe0198 287 dsl_dir_rele(dd->dd_parent, dd);
0eb21616 288 dsl_prop_fini(dd);
b128c09f
BB
289 mutex_destroy(&dd->dd_lock);
290 kmem_free(dd, sizeof (dsl_dir_t));
291 dmu_buf_rele(dbuf, tag);
292 return (err);
34dc7c2f
BB
293}
294
295void
13fe0198 296dsl_dir_rele(dsl_dir_t *dd, void *tag)
34dc7c2f
BB
297{
298 dprintf_dd(dd, "%s\n", "");
299 spa_close(dd->dd_pool->dp_spa, tag);
300 dmu_buf_rele(dd->dd_dbuf, tag);
301}
302
0c66c32d
JG
303/*
304 * Remove a reference to the given dsl dir that is being asynchronously
305 * released. Async releases occur from a taskq performing eviction of
306 * dsl datasets and dirs. This process is identical to a normal release
307 * with the exception of using the async API for releasing the reference on
308 * the spa.
309 */
310void
311dsl_dir_async_rele(dsl_dir_t *dd, void *tag)
312{
313 dprintf_dd(dd, "%s\n", "");
314 spa_async_close(dd->dd_pool->dp_spa, tag);
315 dmu_buf_rele(dd->dd_dbuf, tag);
316}
317
eca7b760 318/* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */
34dc7c2f
BB
319void
320dsl_dir_name(dsl_dir_t *dd, char *buf)
321{
322 if (dd->dd_parent) {
323 dsl_dir_name(dd->dd_parent, buf);
eca7b760
IK
324 VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <,
325 ZFS_MAX_DATASET_NAME_LEN);
34dc7c2f
BB
326 } else {
327 buf[0] = '\0';
328 }
329 if (!MUTEX_HELD(&dd->dd_lock)) {
330 /*
331 * recursive mutex so that we can use
332 * dprintf_dd() with dd_lock held
333 */
334 mutex_enter(&dd->dd_lock);
eca7b760
IK
335 VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
336 <, ZFS_MAX_DATASET_NAME_LEN);
34dc7c2f
BB
337 mutex_exit(&dd->dd_lock);
338 } else {
eca7b760
IK
339 VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
340 <, ZFS_MAX_DATASET_NAME_LEN);
34dc7c2f
BB
341 }
342}
343
29809a6c 344/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
34dc7c2f
BB
345int
346dsl_dir_namelen(dsl_dir_t *dd)
347{
348 int result = 0;
349
350 if (dd->dd_parent) {
351 /* parent's name + 1 for the "/" */
352 result = dsl_dir_namelen(dd->dd_parent) + 1;
353 }
354
355 if (!MUTEX_HELD(&dd->dd_lock)) {
356 /* see dsl_dir_name */
357 mutex_enter(&dd->dd_lock);
358 result += strlen(dd->dd_myname);
359 mutex_exit(&dd->dd_lock);
360 } else {
361 result += strlen(dd->dd_myname);
362 }
363
364 return (result);
365}
366
34dc7c2f
BB
367static int
368getcomponent(const char *path, char *component, const char **nextp)
369{
370 char *p;
13fe0198 371
9babb374 372 if ((path == NULL) || (path[0] == '\0'))
2e528b49 373 return (SET_ERROR(ENOENT));
34dc7c2f
BB
374 /* This would be a good place to reserve some namespace... */
375 p = strpbrk(path, "/@");
376 if (p && (p[1] == '/' || p[1] == '@')) {
377 /* two separators in a row */
2e528b49 378 return (SET_ERROR(EINVAL));
34dc7c2f
BB
379 }
380 if (p == NULL || p == path) {
381 /*
382 * if the first thing is an @ or /, it had better be an
383 * @ and it had better not have any more ats or slashes,
384 * and it had better have something after the @.
385 */
386 if (p != NULL &&
387 (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
2e528b49 388 return (SET_ERROR(EINVAL));
eca7b760 389 if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN)
2e528b49 390 return (SET_ERROR(ENAMETOOLONG));
34dc7c2f
BB
391 (void) strcpy(component, path);
392 p = NULL;
393 } else if (p[0] == '/') {
eca7b760 394 if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
2e528b49 395 return (SET_ERROR(ENAMETOOLONG));
34dc7c2f 396 (void) strncpy(component, path, p - path);
13fe0198 397 component[p - path] = '\0';
34dc7c2f
BB
398 p++;
399 } else if (p[0] == '@') {
400 /*
401 * if the next separator is an @, there better not be
402 * any more slashes.
403 */
404 if (strchr(path, '/'))
2e528b49 405 return (SET_ERROR(EINVAL));
eca7b760 406 if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
2e528b49 407 return (SET_ERROR(ENAMETOOLONG));
34dc7c2f 408 (void) strncpy(component, path, p - path);
13fe0198 409 component[p - path] = '\0';
34dc7c2f 410 } else {
13fe0198 411 panic("invalid p=%p", (void *)p);
34dc7c2f
BB
412 }
413 *nextp = p;
414 return (0);
415}
416
417/*
13fe0198
MA
418 * Return the dsl_dir_t, and possibly the last component which couldn't
419 * be found in *tail. The name must be in the specified dsl_pool_t. This
420 * thread must hold the dp_config_rwlock for the pool. Returns NULL if the
421 * path is bogus, or if tail==NULL and we couldn't parse the whole name.
422 * (*tail)[0] == '@' means that the last component is a snapshot.
34dc7c2f
BB
423 */
424int
13fe0198 425dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
34dc7c2f
BB
426 dsl_dir_t **ddp, const char **tailp)
427{
fcf37ec6 428 char *buf;
13fe0198 429 const char *spaname, *next, *nextnext = NULL;
34dc7c2f
BB
430 int err;
431 dsl_dir_t *dd;
34dc7c2f 432 uint64_t ddobj;
34dc7c2f 433
eca7b760 434 buf = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
34dc7c2f 435 err = getcomponent(name, buf, &next);
13fe0198 436 if (err != 0)
fcf37ec6 437 goto error;
34dc7c2f 438
13fe0198
MA
439 /* Make sure the name is in the specified pool. */
440 spaname = spa_name(dp->dp_spa);
441 if (strcmp(buf, spaname) != 0) {
9063f654 442 err = SET_ERROR(EXDEV);
13fe0198 443 goto error;
34dc7c2f
BB
444 }
445
13fe0198 446 ASSERT(dsl_pool_config_held(dp));
34dc7c2f 447
13fe0198
MA
448 err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
449 if (err != 0) {
fcf37ec6 450 goto error;
34dc7c2f
BB
451 }
452
453 while (next != NULL) {
0c66c32d 454 dsl_dir_t *child_dd;
34dc7c2f 455 err = getcomponent(next, buf, &nextnext);
13fe0198 456 if (err != 0)
34dc7c2f
BB
457 break;
458 ASSERT(next[0] != '\0');
459 if (next[0] == '@')
460 break;
461 dprintf("looking up %s in obj%lld\n",
d683ddbb 462 buf, dsl_dir_phys(dd)->dd_child_dir_zapobj);
34dc7c2f
BB
463
464 err = zap_lookup(dp->dp_meta_objset,
d683ddbb 465 dsl_dir_phys(dd)->dd_child_dir_zapobj,
34dc7c2f 466 buf, sizeof (ddobj), 1, &ddobj);
13fe0198 467 if (err != 0) {
34dc7c2f
BB
468 if (err == ENOENT)
469 err = 0;
470 break;
471 }
472
0c66c32d 473 err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_dd);
13fe0198 474 if (err != 0)
34dc7c2f 475 break;
13fe0198 476 dsl_dir_rele(dd, tag);
0c66c32d 477 dd = child_dd;
34dc7c2f
BB
478 next = nextnext;
479 }
34dc7c2f 480
13fe0198
MA
481 if (err != 0) {
482 dsl_dir_rele(dd, tag);
fcf37ec6 483 goto error;
34dc7c2f
BB
484 }
485
486 /*
487 * It's an error if there's more than one component left, or
488 * tailp==NULL and there's any component left.
489 */
490 if (next != NULL &&
491 (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
492 /* bad path name */
13fe0198 493 dsl_dir_rele(dd, tag);
34dc7c2f 494 dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
2e528b49 495 err = SET_ERROR(ENOENT);
34dc7c2f 496 }
13fe0198 497 if (tailp != NULL)
34dc7c2f 498 *tailp = next;
34dc7c2f 499 *ddp = dd;
fcf37ec6 500error:
eca7b760 501 kmem_free(buf, ZFS_MAX_DATASET_NAME_LEN);
34dc7c2f
BB
502 return (err);
503}
504
788eb90c
JJ
505/*
506 * If the counts are already initialized for this filesystem and its
507 * descendants then do nothing, otherwise initialize the counts.
508 *
509 * The counts on this filesystem, and those below, may be uninitialized due to
510 * either the use of a pre-existing pool which did not support the
511 * filesystem/snapshot limit feature, or one in which the feature had not yet
512 * been enabled.
513 *
514 * Recursively descend the filesystem tree and update the filesystem/snapshot
515 * counts on each filesystem below, then update the cumulative count on the
516 * current filesystem. If the filesystem already has a count set on it,
517 * then we know that its counts, and the counts on the filesystems below it,
518 * are already correct, so we don't have to update this filesystem.
519 */
520static void
521dsl_dir_init_fs_ss_count(dsl_dir_t *dd, dmu_tx_t *tx)
522{
523 uint64_t my_fs_cnt = 0;
524 uint64_t my_ss_cnt = 0;
525 dsl_pool_t *dp = dd->dd_pool;
526 objset_t *os = dp->dp_meta_objset;
527 zap_cursor_t *zc;
528 zap_attribute_t *za;
529 dsl_dataset_t *ds;
530
a0c9a17a 531 ASSERT(spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT));
788eb90c
JJ
532 ASSERT(dsl_pool_config_held(dp));
533 ASSERT(dmu_tx_is_syncing(tx));
534
535 dsl_dir_zapify(dd, tx);
536
537 /*
538 * If the filesystem count has already been initialized then we
539 * don't need to recurse down any further.
540 */
541 if (zap_contains(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT) == 0)
542 return;
543
544 zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
545 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
546
547 /* Iterate my child dirs */
d683ddbb 548 for (zap_cursor_init(zc, os, dsl_dir_phys(dd)->dd_child_dir_zapobj);
788eb90c
JJ
549 zap_cursor_retrieve(zc, za) == 0; zap_cursor_advance(zc)) {
550 dsl_dir_t *chld_dd;
551 uint64_t count;
552
553 VERIFY0(dsl_dir_hold_obj(dp, za->za_first_integer, NULL, FTAG,
554 &chld_dd));
555
556 /*
557 * Ignore hidden ($FREE, $MOS & $ORIGIN) objsets and
558 * temporary datasets.
559 */
560 if (chld_dd->dd_myname[0] == '$' ||
561 chld_dd->dd_myname[0] == '%') {
562 dsl_dir_rele(chld_dd, FTAG);
563 continue;
564 }
565
566 my_fs_cnt++; /* count this child */
567
568 dsl_dir_init_fs_ss_count(chld_dd, tx);
569
570 VERIFY0(zap_lookup(os, chld_dd->dd_object,
571 DD_FIELD_FILESYSTEM_COUNT, sizeof (count), 1, &count));
572 my_fs_cnt += count;
573 VERIFY0(zap_lookup(os, chld_dd->dd_object,
574 DD_FIELD_SNAPSHOT_COUNT, sizeof (count), 1, &count));
575 my_ss_cnt += count;
576
577 dsl_dir_rele(chld_dd, FTAG);
578 }
579 zap_cursor_fini(zc);
580 /* Count my snapshots (we counted children's snapshots above) */
581 VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
d683ddbb 582 dsl_dir_phys(dd)->dd_head_dataset_obj, FTAG, &ds));
788eb90c 583
d683ddbb 584 for (zap_cursor_init(zc, os, dsl_dataset_phys(ds)->ds_snapnames_zapobj);
788eb90c
JJ
585 zap_cursor_retrieve(zc, za) == 0;
586 zap_cursor_advance(zc)) {
587 /* Don't count temporary snapshots */
588 if (za->za_name[0] != '%')
589 my_ss_cnt++;
590 }
ca227e54 591 zap_cursor_fini(zc);
788eb90c
JJ
592
593 dsl_dataset_rele(ds, FTAG);
594
595 kmem_free(zc, sizeof (zap_cursor_t));
596 kmem_free(za, sizeof (zap_attribute_t));
597
598 /* we're in a sync task, update counts */
599 dmu_buf_will_dirty(dd->dd_dbuf, tx);
600 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
601 sizeof (my_fs_cnt), 1, &my_fs_cnt, tx));
602 VERIFY0(zap_add(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
603 sizeof (my_ss_cnt), 1, &my_ss_cnt, tx));
604}
605
606static int
607dsl_dir_actv_fs_ss_limit_check(void *arg, dmu_tx_t *tx)
608{
609 char *ddname = (char *)arg;
610 dsl_pool_t *dp = dmu_tx_pool(tx);
611 dsl_dataset_t *ds;
612 dsl_dir_t *dd;
613 int error;
614
615 error = dsl_dataset_hold(dp, ddname, FTAG, &ds);
616 if (error != 0)
617 return (error);
618
619 if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT)) {
620 dsl_dataset_rele(ds, FTAG);
621 return (SET_ERROR(ENOTSUP));
622 }
623
624 dd = ds->ds_dir;
625 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_FS_SS_LIMIT) &&
626 dsl_dir_is_zapified(dd) &&
627 zap_contains(dp->dp_meta_objset, dd->dd_object,
628 DD_FIELD_FILESYSTEM_COUNT) == 0) {
629 dsl_dataset_rele(ds, FTAG);
630 return (SET_ERROR(EALREADY));
631 }
632
633 dsl_dataset_rele(ds, FTAG);
634 return (0);
635}
636
637static void
638dsl_dir_actv_fs_ss_limit_sync(void *arg, dmu_tx_t *tx)
639{
640 char *ddname = (char *)arg;
641 dsl_pool_t *dp = dmu_tx_pool(tx);
642 dsl_dataset_t *ds;
643 spa_t *spa;
644
645 VERIFY0(dsl_dataset_hold(dp, ddname, FTAG, &ds));
646
647 spa = dsl_dataset_get_spa(ds);
648
649 if (!spa_feature_is_active(spa, SPA_FEATURE_FS_SS_LIMIT)) {
650 /*
651 * Since the feature was not active and we're now setting a
652 * limit, increment the feature-active counter so that the
653 * feature becomes active for the first time.
654 *
655 * We are already in a sync task so we can update the MOS.
656 */
657 spa_feature_incr(spa, SPA_FEATURE_FS_SS_LIMIT, tx);
658 }
659
660 /*
661 * Since we are now setting a non-UINT64_MAX limit on the filesystem,
662 * we need to ensure the counts are correct. Descend down the tree from
663 * this point and update all of the counts to be accurate.
664 */
665 dsl_dir_init_fs_ss_count(ds->ds_dir, tx);
666
667 dsl_dataset_rele(ds, FTAG);
668}
669
670/*
671 * Make sure the feature is enabled and activate it if necessary.
672 * Since we're setting a limit, ensure the on-disk counts are valid.
673 * This is only called by the ioctl path when setting a limit value.
674 *
675 * We do not need to validate the new limit, since users who can change the
676 * limit are also allowed to exceed the limit.
677 */
678int
679dsl_dir_activate_fs_ss_limit(const char *ddname)
680{
681 int error;
682
683 error = dsl_sync_task(ddname, dsl_dir_actv_fs_ss_limit_check,
3d45fdd6
MA
684 dsl_dir_actv_fs_ss_limit_sync, (void *)ddname, 0,
685 ZFS_SPACE_CHECK_RESERVED);
788eb90c
JJ
686
687 if (error == EALREADY)
688 error = 0;
689
690 return (error);
691}
692
693/*
694 * Used to determine if the filesystem_limit or snapshot_limit should be
695 * enforced. We allow the limit to be exceeded if the user has permission to
696 * write the property value. We pass in the creds that we got in the open
697 * context since we will always be the GZ root in syncing context. We also have
698 * to handle the case where we are allowed to change the limit on the current
699 * dataset, but there may be another limit in the tree above.
700 *
701 * We can never modify these two properties within a non-global zone. In
702 * addition, the other checks are modeled on zfs_secpolicy_write_perms. We
703 * can't use that function since we are already holding the dp_config_rwlock.
704 * In addition, we already have the dd and dealing with snapshots is simplified
705 * in this code.
706 */
707
708typedef enum {
709 ENFORCE_ALWAYS,
710 ENFORCE_NEVER,
711 ENFORCE_ABOVE
712} enforce_res_t;
713
714static enforce_res_t
715dsl_enforce_ds_ss_limits(dsl_dir_t *dd, zfs_prop_t prop, cred_t *cr)
716{
717 enforce_res_t enforce = ENFORCE_ALWAYS;
718 uint64_t obj;
719 dsl_dataset_t *ds;
720 uint64_t zoned;
721
722 ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
723 prop == ZFS_PROP_SNAPSHOT_LIMIT);
724
725#ifdef _KERNEL
726 if (crgetzoneid(cr) != GLOBAL_ZONEID)
727 return (ENFORCE_ALWAYS);
728
729 if (secpolicy_zfs(cr) == 0)
730 return (ENFORCE_NEVER);
731#endif
732
d683ddbb 733 if ((obj = dsl_dir_phys(dd)->dd_head_dataset_obj) == 0)
788eb90c
JJ
734 return (ENFORCE_ALWAYS);
735
736 ASSERT(dsl_pool_config_held(dd->dd_pool));
737
738 if (dsl_dataset_hold_obj(dd->dd_pool, obj, FTAG, &ds) != 0)
739 return (ENFORCE_ALWAYS);
740
741 if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL) || zoned) {
742 /* Only root can access zoned fs's from the GZ */
743 enforce = ENFORCE_ALWAYS;
744 } else {
745 if (dsl_deleg_access_impl(ds, zfs_prop_to_name(prop), cr) == 0)
746 enforce = ENFORCE_ABOVE;
747 }
748
749 dsl_dataset_rele(ds, FTAG);
750 return (enforce);
751}
752
753/*
754 * Check if adding additional child filesystem(s) would exceed any filesystem
755 * limits or adding additional snapshot(s) would exceed any snapshot limits.
756 * The prop argument indicates which limit to check.
757 *
758 * Note that all filesystem limits up to the root (or the highest
759 * initialized) filesystem or the given ancestor must be satisfied.
760 */
761int
762dsl_fs_ss_limit_check(dsl_dir_t *dd, uint64_t delta, zfs_prop_t prop,
763 dsl_dir_t *ancestor, cred_t *cr)
764{
765 objset_t *os = dd->dd_pool->dp_meta_objset;
766 uint64_t limit, count;
767 char *count_prop;
768 enforce_res_t enforce;
769 int err = 0;
770
771 ASSERT(dsl_pool_config_held(dd->dd_pool));
772 ASSERT(prop == ZFS_PROP_FILESYSTEM_LIMIT ||
773 prop == ZFS_PROP_SNAPSHOT_LIMIT);
774
775 /*
776 * If we're allowed to change the limit, don't enforce the limit
777 * e.g. this can happen if a snapshot is taken by an administrative
778 * user in the global zone (i.e. a recursive snapshot by root).
779 * However, we must handle the case of delegated permissions where we
780 * are allowed to change the limit on the current dataset, but there
781 * is another limit in the tree above.
782 */
783 enforce = dsl_enforce_ds_ss_limits(dd, prop, cr);
784 if (enforce == ENFORCE_NEVER)
785 return (0);
786
787 /*
788 * e.g. if renaming a dataset with no snapshots, count adjustment
789 * is 0.
790 */
791 if (delta == 0)
792 return (0);
793
794 if (prop == ZFS_PROP_SNAPSHOT_LIMIT) {
795 /*
796 * We don't enforce the limit for temporary snapshots. This is
797 * indicated by a NULL cred_t argument.
798 */
799 if (cr == NULL)
800 return (0);
801
802 count_prop = DD_FIELD_SNAPSHOT_COUNT;
803 } else {
804 count_prop = DD_FIELD_FILESYSTEM_COUNT;
805 }
806
807 /*
808 * If an ancestor has been provided, stop checking the limit once we
809 * hit that dir. We need this during rename so that we don't overcount
810 * the check once we recurse up to the common ancestor.
811 */
812 if (ancestor == dd)
813 return (0);
814
815 /*
816 * If we hit an uninitialized node while recursing up the tree, we can
817 * stop since we know there is no limit here (or above). The counts are
818 * not valid on this node and we know we won't touch this node's counts.
819 */
820 if (!dsl_dir_is_zapified(dd) || zap_lookup(os, dd->dd_object,
821 count_prop, sizeof (count), 1, &count) == ENOENT)
822 return (0);
823
824 err = dsl_prop_get_dd(dd, zfs_prop_to_name(prop), 8, 1, &limit, NULL,
825 B_FALSE);
826 if (err != 0)
827 return (err);
828
829 /* Is there a limit which we've hit? */
830 if (enforce == ENFORCE_ALWAYS && (count + delta) > limit)
831 return (SET_ERROR(EDQUOT));
832
833 if (dd->dd_parent != NULL)
834 err = dsl_fs_ss_limit_check(dd->dd_parent, delta, prop,
835 ancestor, cr);
836
837 return (err);
838}
839
840/*
841 * Adjust the filesystem or snapshot count for the specified dsl_dir_t and all
842 * parents. When a new filesystem/snapshot is created, increment the count on
843 * all parents, and when a filesystem/snapshot is destroyed, decrement the
844 * count.
845 */
846void
847dsl_fs_ss_count_adjust(dsl_dir_t *dd, int64_t delta, const char *prop,
848 dmu_tx_t *tx)
849{
850 int err;
851 objset_t *os = dd->dd_pool->dp_meta_objset;
852 uint64_t count;
853
854 ASSERT(dsl_pool_config_held(dd->dd_pool));
855 ASSERT(dmu_tx_is_syncing(tx));
856 ASSERT(strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0 ||
857 strcmp(prop, DD_FIELD_SNAPSHOT_COUNT) == 0);
858
859 /*
860 * When we receive an incremental stream into a filesystem that already
861 * exists, a temporary clone is created. We don't count this temporary
862 * clone, whose name begins with a '%'. We also ignore hidden ($FREE,
863 * $MOS & $ORIGIN) objsets.
864 */
865 if ((dd->dd_myname[0] == '%' || dd->dd_myname[0] == '$') &&
866 strcmp(prop, DD_FIELD_FILESYSTEM_COUNT) == 0)
867 return;
868
869 /*
870 * e.g. if renaming a dataset with no snapshots, count adjustment is 0
871 */
872 if (delta == 0)
873 return;
874
875 /*
876 * If we hit an uninitialized node while recursing up the tree, we can
877 * stop since we know the counts are not valid on this node and we
878 * know we shouldn't touch this node's counts. An uninitialized count
879 * on the node indicates that either the feature has not yet been
880 * activated or there are no limits on this part of the tree.
881 */
882 if (!dsl_dir_is_zapified(dd) || (err = zap_lookup(os, dd->dd_object,
883 prop, sizeof (count), 1, &count)) == ENOENT)
884 return;
885 VERIFY0(err);
886
887 count += delta;
888 /* Use a signed verify to make sure we're not neg. */
889 VERIFY3S(count, >=, 0);
890
891 VERIFY0(zap_update(os, dd->dd_object, prop, sizeof (count), 1, &count,
892 tx));
893
894 /* Roll up this additional count into our ancestors */
895 if (dd->dd_parent != NULL)
896 dsl_fs_ss_count_adjust(dd->dd_parent, delta, prop, tx);
897}
898
34dc7c2f 899uint64_t
b128c09f
BB
900dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
901 dmu_tx_t *tx)
34dc7c2f 902{
b128c09f 903 objset_t *mos = dp->dp_meta_objset;
34dc7c2f 904 uint64_t ddobj;
428870ff 905 dsl_dir_phys_t *ddphys;
34dc7c2f
BB
906 dmu_buf_t *dbuf;
907
908 ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
909 DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
b128c09f 910 if (pds) {
d683ddbb 911 VERIFY(0 == zap_add(mos, dsl_dir_phys(pds)->dd_child_dir_zapobj,
b128c09f
BB
912 name, sizeof (uint64_t), 1, &ddobj, tx));
913 } else {
914 /* it's the root dir */
915 VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
916 DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
917 }
34dc7c2f
BB
918 VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
919 dmu_buf_will_dirty(dbuf, tx);
428870ff 920 ddphys = dbuf->db_data;
34dc7c2f 921
428870ff 922 ddphys->dd_creation_time = gethrestime_sec();
788eb90c 923 if (pds) {
428870ff 924 ddphys->dd_parent_obj = pds->dd_object;
788eb90c
JJ
925
926 /* update the filesystem counts */
927 dsl_fs_ss_count_adjust(pds, 1, DD_FIELD_FILESYSTEM_COUNT, tx);
928 }
428870ff 929 ddphys->dd_props_zapobj = zap_create(mos,
34dc7c2f 930 DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
428870ff 931 ddphys->dd_child_dir_zapobj = zap_create(mos,
34dc7c2f 932 DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
b128c09f 933 if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
428870ff 934 ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
b5256303 935
34dc7c2f
BB
936 dmu_buf_rele(dbuf, FTAG);
937
938 return (ddobj);
939}
940
b128c09f
BB
941boolean_t
942dsl_dir_is_clone(dsl_dir_t *dd)
34dc7c2f 943{
d683ddbb 944 return (dsl_dir_phys(dd)->dd_origin_obj &&
b128c09f 945 (dd->dd_pool->dp_origin_snap == NULL ||
d683ddbb 946 dsl_dir_phys(dd)->dd_origin_obj !=
b128c09f 947 dd->dd_pool->dp_origin_snap->ds_object));
34dc7c2f
BB
948}
949
d99a0153
CW
950
951uint64_t
952dsl_dir_get_used(dsl_dir_t *dd)
953{
954 return (dsl_dir_phys(dd)->dd_used_bytes);
955}
956
957uint64_t
958dsl_dir_get_quota(dsl_dir_t *dd)
959{
960 return (dsl_dir_phys(dd)->dd_quota);
961}
962
963uint64_t
964dsl_dir_get_reservation(dsl_dir_t *dd)
965{
966 return (dsl_dir_phys(dd)->dd_reserved);
967}
968
969uint64_t
970dsl_dir_get_compressratio(dsl_dir_t *dd)
971{
972 /* a fixed point number, 100x the ratio */
973 return (dsl_dir_phys(dd)->dd_compressed_bytes == 0 ? 100 :
974 (dsl_dir_phys(dd)->dd_uncompressed_bytes * 100 /
975 dsl_dir_phys(dd)->dd_compressed_bytes));
976}
977
978uint64_t
979dsl_dir_get_logicalused(dsl_dir_t *dd)
980{
981 return (dsl_dir_phys(dd)->dd_uncompressed_bytes);
982}
983
984uint64_t
985dsl_dir_get_usedsnap(dsl_dir_t *dd)
986{
987 return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_SNAP]);
988}
989
990uint64_t
991dsl_dir_get_usedds(dsl_dir_t *dd)
992{
993 return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_HEAD]);
994}
995
996uint64_t
997dsl_dir_get_usedrefreserv(dsl_dir_t *dd)
998{
999 return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_REFRSRV]);
1000}
1001
1002uint64_t
1003dsl_dir_get_usedchild(dsl_dir_t *dd)
1004{
1005 return (dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD] +
1006 dsl_dir_phys(dd)->dd_used_breakdown[DD_USED_CHILD_RSRV]);
1007}
1008
34dc7c2f 1009void
d99a0153
CW
1010dsl_dir_get_origin(dsl_dir_t *dd, char *buf)
1011{
1012 dsl_dataset_t *ds;
1013 VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
1014 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds));
1015
1016 dsl_dataset_name(ds, buf);
1017
1018 dsl_dataset_rele(ds, FTAG);
1019}
1020
1021int
1022dsl_dir_get_filesystem_count(dsl_dir_t *dd, uint64_t *count)
34dc7c2f 1023{
d99a0153
CW
1024 if (dsl_dir_is_zapified(dd)) {
1025 objset_t *os = dd->dd_pool->dp_meta_objset;
1026 return (zap_lookup(os, dd->dd_object, DD_FIELD_FILESYSTEM_COUNT,
1027 sizeof (*count), 1, count));
1028 } else {
1029 return (ENOENT);
1030 }
1031}
1032
1033int
1034dsl_dir_get_snapshot_count(dsl_dir_t *dd, uint64_t *count)
1035{
1036 if (dsl_dir_is_zapified(dd)) {
1037 objset_t *os = dd->dd_pool->dp_meta_objset;
1038 return (zap_lookup(os, dd->dd_object, DD_FIELD_SNAPSHOT_COUNT,
1039 sizeof (*count), 1, count));
1040 } else {
1041 return (ENOENT);
1042 }
1043}
b5256303 1044
d99a0153
CW
1045void
1046dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
1047{
34dc7c2f 1048 mutex_enter(&dd->dd_lock);
d683ddbb 1049 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA,
d99a0153 1050 dsl_dir_get_quota(dd));
34dc7c2f 1051 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
d99a0153 1052 dsl_dir_get_reservation(dd));
24a64651 1053 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED,
d99a0153 1054 dsl_dir_get_logicalused(dd));
d683ddbb 1055 if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
b128c09f 1056 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
d99a0153 1057 dsl_dir_get_usedsnap(dd));
b128c09f 1058 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
d99a0153 1059 dsl_dir_get_usedds(dd));
b128c09f 1060 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
d99a0153 1061 dsl_dir_get_usedrefreserv(dd));
b128c09f 1062 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
d99a0153 1063 dsl_dir_get_usedchild(dd));
b128c09f 1064 }
34dc7c2f
BB
1065 mutex_exit(&dd->dd_lock);
1066
d99a0153
CW
1067 uint64_t count;
1068 if (dsl_dir_get_filesystem_count(dd, &count) == 0) {
1069 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_FILESYSTEM_COUNT,
1070 count);
1071 }
1072 if (dsl_dir_get_snapshot_count(dd, &count) == 0) {
1073 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_SNAPSHOT_COUNT,
1074 count);
788eb90c
JJ
1075 }
1076
b128c09f 1077 if (dsl_dir_is_clone(dd)) {
eca7b760 1078 char buf[ZFS_MAX_DATASET_NAME_LEN];
d99a0153 1079 dsl_dir_get_origin(dd, buf);
34dc7c2f
BB
1080 dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
1081 }
d99a0153 1082
34dc7c2f
BB
1083}
1084
1085void
1086dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
1087{
1088 dsl_pool_t *dp = dd->dd_pool;
1089
d683ddbb 1090 ASSERT(dsl_dir_phys(dd));
34dc7c2f 1091
13fe0198 1092 if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
34dc7c2f
BB
1093 /* up the hold count until we can be written out */
1094 dmu_buf_add_ref(dd->dd_dbuf, dd);
1095 }
1096}
1097
1098static int64_t
1099parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
1100{
d683ddbb
JG
1101 uint64_t old_accounted = MAX(used, dsl_dir_phys(dd)->dd_reserved);
1102 uint64_t new_accounted =
1103 MAX(used + delta, dsl_dir_phys(dd)->dd_reserved);
34dc7c2f
BB
1104 return (new_accounted - old_accounted);
1105}
1106
1107void
1108dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
1109{
1110 ASSERT(dmu_tx_is_syncing(tx));
1111
34dc7c2f 1112 mutex_enter(&dd->dd_lock);
c99c9001 1113 ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
34dc7c2f
BB
1114 dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
1115 dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
1116 dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
34dc7c2f
BB
1117 mutex_exit(&dd->dd_lock);
1118
1119 /* release the hold from dsl_dir_dirty */
1120 dmu_buf_rele(dd->dd_dbuf, dd);
1121}
1122
1123static uint64_t
1124dsl_dir_space_towrite(dsl_dir_t *dd)
1125{
1126 uint64_t space = 0;
34dc7c2f
BB
1127
1128 ASSERT(MUTEX_HELD(&dd->dd_lock));
1129
3ec3bc21
BB
1130 for (int i = 0; i < TXG_SIZE; i++) {
1131 space += dd->dd_space_towrite[i & TXG_MASK];
1132 ASSERT3U(dd->dd_space_towrite[i & TXG_MASK], >=, 0);
34dc7c2f
BB
1133 }
1134 return (space);
1135}
1136
1137/*
1138 * How much space would dd have available if ancestor had delta applied
1139 * to it? If ondiskonly is set, we're only interested in what's
1140 * on-disk, not estimated pending changes.
1141 */
1142uint64_t
1143dsl_dir_space_available(dsl_dir_t *dd,
1144 dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
1145{
1146 uint64_t parentspace, myspace, quota, used;
1147
1148 /*
1149 * If there are no restrictions otherwise, assume we have
1150 * unlimited space available.
1151 */
1152 quota = UINT64_MAX;
1153 parentspace = UINT64_MAX;
1154
1155 if (dd->dd_parent != NULL) {
1156 parentspace = dsl_dir_space_available(dd->dd_parent,
1157 ancestor, delta, ondiskonly);
1158 }
1159
1160 mutex_enter(&dd->dd_lock);
d683ddbb
JG
1161 if (dsl_dir_phys(dd)->dd_quota != 0)
1162 quota = dsl_dir_phys(dd)->dd_quota;
1163 used = dsl_dir_phys(dd)->dd_used_bytes;
34dc7c2f
BB
1164 if (!ondiskonly)
1165 used += dsl_dir_space_towrite(dd);
34dc7c2f
BB
1166
1167 if (dd->dd_parent == NULL) {
1168 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
1169 quota = MIN(quota, poolsize);
1170 }
1171
d683ddbb 1172 if (dsl_dir_phys(dd)->dd_reserved > used && parentspace != UINT64_MAX) {
34dc7c2f
BB
1173 /*
1174 * We have some space reserved, in addition to what our
1175 * parent gave us.
1176 */
d683ddbb 1177 parentspace += dsl_dir_phys(dd)->dd_reserved - used;
34dc7c2f
BB
1178 }
1179
b128c09f
BB
1180 if (dd == ancestor) {
1181 ASSERT(delta <= 0);
1182 ASSERT(used >= -delta);
1183 used += delta;
1184 if (parentspace != UINT64_MAX)
1185 parentspace -= delta;
1186 }
1187
34dc7c2f
BB
1188 if (used > quota) {
1189 /* over quota */
1190 myspace = 0;
34dc7c2f
BB
1191 } else {
1192 /*
1193 * the lesser of the space provided by our parent and
1194 * the space left in our quota
1195 */
1196 myspace = MIN(parentspace, quota - used);
1197 }
1198
1199 mutex_exit(&dd->dd_lock);
1200
1201 return (myspace);
1202}
1203
1204struct tempreserve {
1205 list_node_t tr_node;
34dc7c2f
BB
1206 dsl_dir_t *tr_ds;
1207 uint64_t tr_size;
1208};
1209
1210static int
1211dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
3ec3bc21 1212 boolean_t ignorequota, list_t *tr_list,
34dc7c2f
BB
1213 dmu_tx_t *tx, boolean_t first)
1214{
419c80e6 1215 uint64_t txg;
3ec3bc21 1216 uint64_t quota;
34dc7c2f 1217 struct tempreserve *tr;
419c80e6
D
1218 int retval;
1219 uint64_t ref_rsrv;
1220
1221top_of_function:
1222 txg = tx->tx_txg;
1223 retval = EDQUOT;
1224 ref_rsrv = 0;
34dc7c2f
BB
1225
1226 ASSERT3U(txg, !=, 0);
1227 ASSERT3S(asize, >, 0);
1228
1229 mutex_enter(&dd->dd_lock);
1230
1231 /*
1232 * Check against the dsl_dir's quota. We don't add in the delta
1233 * when checking for over-quota because they get one free hit.
1234 */
3ec3bc21
BB
1235 uint64_t est_inflight = dsl_dir_space_towrite(dd);
1236 for (int i = 0; i < TXG_SIZE; i++)
34dc7c2f 1237 est_inflight += dd->dd_tempreserved[i];
3ec3bc21 1238 uint64_t used_on_disk = dsl_dir_phys(dd)->dd_used_bytes;
34dc7c2f
BB
1239
1240 /*
1241 * On the first iteration, fetch the dataset's used-on-disk and
1242 * refreservation values. Also, if checkrefquota is set, test if
1243 * allocating this space would exceed the dataset's refquota.
1244 */
1245 if (first && tx->tx_objset) {
1246 int error;
428870ff 1247 dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
34dc7c2f 1248
3ec3bc21 1249 error = dsl_dataset_check_quota(ds, !netfree,
34dc7c2f 1250 asize, est_inflight, &used_on_disk, &ref_rsrv);
3ec3bc21 1251 if (error != 0) {
34dc7c2f 1252 mutex_exit(&dd->dd_lock);
3d920a15 1253 DMU_TX_STAT_BUMP(dmu_tx_quota);
34dc7c2f
BB
1254 return (error);
1255 }
1256 }
1257
1258 /*
1259 * If this transaction will result in a net free of space,
1260 * we want to let it through.
1261 */
d683ddbb 1262 if (ignorequota || netfree || dsl_dir_phys(dd)->dd_quota == 0)
34dc7c2f
BB
1263 quota = UINT64_MAX;
1264 else
d683ddbb 1265 quota = dsl_dir_phys(dd)->dd_quota;
34dc7c2f
BB
1266
1267 /*
428870ff
BB
1268 * Adjust the quota against the actual pool size at the root
1269 * minus any outstanding deferred frees.
34dc7c2f
BB
1270 * To ensure that it's possible to remove files from a full
1271 * pool without inducing transient overcommits, we throttle
1272 * netfree transactions against a quota that is slightly larger,
1273 * but still within the pool's allocation slop. In cases where
1274 * we're very close to full, this will allow a steady trickle of
1275 * removes to get through.
1276 */
3ec3bc21 1277 uint64_t deferred = 0;
34dc7c2f 1278 if (dd->dd_parent == NULL) {
428870ff 1279 spa_t *spa = dd->dd_pool->dp_spa;
34dc7c2f 1280 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
428870ff
BB
1281 deferred = metaslab_class_get_deferred(spa_normal_class(spa));
1282 if (poolsize - deferred < quota) {
1283 quota = poolsize - deferred;
1284 retval = ENOSPC;
34dc7c2f
BB
1285 }
1286 }
1287
1288 /*
1289 * If they are requesting more space, and our current estimate
1290 * is over quota, they get to try again unless the actual
1291 * on-disk is over quota and there are no pending changes (which
1292 * may free up space for us).
1293 */
428870ff
BB
1294 if (used_on_disk + est_inflight >= quota) {
1295 if (est_inflight > 0 || used_on_disk < quota ||
1296 (retval == ENOSPC && used_on_disk < quota + deferred))
1297 retval = ERESTART;
34dc7c2f
BB
1298 dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
1299 "quota=%lluK tr=%lluK err=%d\n",
1300 used_on_disk>>10, est_inflight>>10,
428870ff 1301 quota>>10, asize>>10, retval);
34dc7c2f 1302 mutex_exit(&dd->dd_lock);
3d920a15 1303 DMU_TX_STAT_BUMP(dmu_tx_quota);
2e528b49 1304 return (SET_ERROR(retval));
34dc7c2f
BB
1305 }
1306
1307 /* We need to up our estimated delta before dropping dd_lock */
3ec3bc21 1308 dd->dd_tempreserved[txg & TXG_MASK] += asize;
34dc7c2f 1309
3ec3bc21 1310 uint64_t parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
34dc7c2f
BB
1311 asize - ref_rsrv);
1312 mutex_exit(&dd->dd_lock);
1313
79c76d5b 1314 tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
34dc7c2f
BB
1315 tr->tr_ds = dd;
1316 tr->tr_size = asize;
1317 list_insert_tail(tr_list, tr);
1318
1319 /* see if it's OK with our parent */
3ec3bc21 1320 if (dd->dd_parent != NULL && parent_rsrv != 0) {
419c80e6
D
1321 /*
1322 * Recurse on our parent without recursion. This has been
1323 * observed to be potentially large stack usage even within
1324 * the test suite. Largest seen stack was 7632 bytes on linux.
1325 */
1326
1327 dd = dd->dd_parent;
1328 asize = parent_rsrv;
1329 ignorequota = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
1330 first = B_FALSE;
1331 goto top_of_function;
34dc7c2f 1332
34dc7c2f
BB
1333 } else {
1334 return (0);
1335 }
1336}
1337
1338/*
1339 * Reserve space in this dsl_dir, to be used in this tx's txg.
1340 * After the space has been dirtied (and dsl_dir_willuse_space()
1341 * has been called), the reservation should be canceled, using
1342 * dsl_dir_tempreserve_clear().
1343 */
1344int
1345dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
3ec3bc21 1346 boolean_t netfree, void **tr_cookiep, dmu_tx_t *tx)
34dc7c2f
BB
1347{
1348 int err;
1349 list_t *tr_list;
1350
1351 if (asize == 0) {
1352 *tr_cookiep = NULL;
1353 return (0);
1354 }
1355
79c76d5b 1356 tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
34dc7c2f
BB
1357 list_create(tr_list, sizeof (struct tempreserve),
1358 offsetof(struct tempreserve, tr_node));
1359 ASSERT3S(asize, >, 0);
34dc7c2f
BB
1360
1361 err = arc_tempreserve_space(lsize, tx->tx_txg);
1362 if (err == 0) {
1363 struct tempreserve *tr;
1364
79c76d5b 1365 tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
34dc7c2f
BB
1366 tr->tr_size = lsize;
1367 list_insert_tail(tr_list, tr);
34dc7c2f
BB
1368 } else {
1369 if (err == EAGAIN) {
e8b96c60
MA
1370 /*
1371 * If arc_memory_throttle() detected that pageout
1372 * is running and we are low on memory, we delay new
1373 * non-pageout transactions to give pageout an
1374 * advantage.
1375 *
1376 * It is unfortunate to be delaying while the caller's
1377 * locks are held.
1378 */
63fd3c6c
AL
1379 txg_delay(dd->dd_pool, tx->tx_txg,
1380 MSEC2NSEC(10), MSEC2NSEC(10));
2e528b49 1381 err = SET_ERROR(ERESTART);
34dc7c2f 1382 }
34dc7c2f
BB
1383 }
1384
1385 if (err == 0) {
3ec3bc21
BB
1386 err = dsl_dir_tempreserve_impl(dd, asize, netfree,
1387 B_FALSE, tr_list, tx, B_TRUE);
34dc7c2f
BB
1388 }
1389
13fe0198 1390 if (err != 0)
34dc7c2f
BB
1391 dsl_dir_tempreserve_clear(tr_list, tx);
1392 else
1393 *tr_cookiep = tr_list;
1394
1395 return (err);
1396}
1397
1398/*
1399 * Clear a temporary reservation that we previously made with
1400 * dsl_dir_tempreserve_space().
1401 */
1402void
1403dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
1404{
1405 int txgidx = tx->tx_txg & TXG_MASK;
1406 list_t *tr_list = tr_cookie;
1407 struct tempreserve *tr;
1408
1409 ASSERT3U(tx->tx_txg, !=, 0);
1410
1411 if (tr_cookie == NULL)
1412 return;
1413
e8b96c60
MA
1414 while ((tr = list_head(tr_list)) != NULL) {
1415 if (tr->tr_ds) {
34dc7c2f
BB
1416 mutex_enter(&tr->tr_ds->dd_lock);
1417 ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
1418 tr->tr_size);
1419 tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
1420 mutex_exit(&tr->tr_ds->dd_lock);
1421 } else {
1422 arc_tempreserve_clear(tr->tr_size);
1423 }
1424 list_remove(tr_list, tr);
1425 kmem_free(tr, sizeof (struct tempreserve));
1426 }
1427
1428 kmem_free(tr_list, sizeof (list_t));
1429}
1430
e8b96c60
MA
1431/*
1432 * This should be called from open context when we think we're going to write
1433 * or free space, for example when dirtying data. Be conservative; it's okay
1434 * to write less space or free more, but we don't want to write more or free
1435 * less than the amount specified.
1ba16159
AB
1436 *
1437 * NOTE: The behavior of this function is identical to the Illumos / FreeBSD
1438 * version however it has been adjusted to use an iterative rather then
1439 * recursive algorithm to minimize stack usage.
e8b96c60
MA
1440 */
1441void
1442dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
34dc7c2f
BB
1443{
1444 int64_t parent_space;
1445 uint64_t est_used;
1446
1ba16159
AB
1447 do {
1448 mutex_enter(&dd->dd_lock);
1449 if (space > 0)
1450 dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
34dc7c2f 1451
1ba16159 1452 est_used = dsl_dir_space_towrite(dd) +
d683ddbb 1453 dsl_dir_phys(dd)->dd_used_bytes;
1ba16159
AB
1454 parent_space = parent_delta(dd, est_used, space);
1455 mutex_exit(&dd->dd_lock);
34dc7c2f 1456
1ba16159
AB
1457 /* Make sure that we clean up dd_space_to* */
1458 dsl_dir_dirty(dd, tx);
34dc7c2f 1459
1ba16159
AB
1460 dd = dd->dd_parent;
1461 space = parent_space;
1462 } while (space && dd);
34dc7c2f
BB
1463}
1464
1465/* call from syncing context when we actually write/free space for this dd */
1466void
b128c09f 1467dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
34dc7c2f
BB
1468 int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
1469{
1470 int64_t accounted_delta;
a169a625
MA
1471
1472 /*
1473 * dsl_dataset_set_refreservation_sync_impl() calls this with
1474 * dd_lock held, so that it can atomically update
1475 * ds->ds_reserved and the dsl_dir accounting, so that
1476 * dsl_dataset_check_quota() can see dataset and dir accounting
1477 * consistently.
1478 */
b128c09f 1479 boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
34dc7c2f
BB
1480
1481 ASSERT(dmu_tx_is_syncing(tx));
b128c09f 1482 ASSERT(type < DD_USED_NUM);
34dc7c2f 1483
a169a625
MA
1484 dmu_buf_will_dirty(dd->dd_dbuf, tx);
1485
b128c09f
BB
1486 if (needlock)
1487 mutex_enter(&dd->dd_lock);
d683ddbb
JG
1488 accounted_delta =
1489 parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, used);
1490 ASSERT(used >= 0 || dsl_dir_phys(dd)->dd_used_bytes >= -used);
34dc7c2f 1491 ASSERT(compressed >= 0 ||
d683ddbb 1492 dsl_dir_phys(dd)->dd_compressed_bytes >= -compressed);
34dc7c2f 1493 ASSERT(uncompressed >= 0 ||
d683ddbb
JG
1494 dsl_dir_phys(dd)->dd_uncompressed_bytes >= -uncompressed);
1495 dsl_dir_phys(dd)->dd_used_bytes += used;
1496 dsl_dir_phys(dd)->dd_uncompressed_bytes += uncompressed;
1497 dsl_dir_phys(dd)->dd_compressed_bytes += compressed;
b128c09f 1498
d683ddbb 1499 if (dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN) {
b128c09f 1500 ASSERT(used > 0 ||
d683ddbb
JG
1501 dsl_dir_phys(dd)->dd_used_breakdown[type] >= -used);
1502 dsl_dir_phys(dd)->dd_used_breakdown[type] += used;
b128c09f 1503#ifdef DEBUG
d6320ddb
BB
1504 {
1505 dd_used_t t;
1506 uint64_t u = 0;
1507 for (t = 0; t < DD_USED_NUM; t++)
d683ddbb
JG
1508 u += dsl_dir_phys(dd)->dd_used_breakdown[t];
1509 ASSERT3U(u, ==, dsl_dir_phys(dd)->dd_used_bytes);
d6320ddb 1510 }
b128c09f
BB
1511#endif
1512 }
1513 if (needlock)
1514 mutex_exit(&dd->dd_lock);
34dc7c2f
BB
1515
1516 if (dd->dd_parent != NULL) {
b128c09f 1517 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
34dc7c2f 1518 accounted_delta, compressed, uncompressed, tx);
b128c09f
BB
1519 dsl_dir_transfer_space(dd->dd_parent,
1520 used - accounted_delta,
1521 DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
34dc7c2f
BB
1522 }
1523}
1524
b128c09f
BB
1525void
1526dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
1527 dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
1528{
b128c09f
BB
1529 ASSERT(dmu_tx_is_syncing(tx));
1530 ASSERT(oldtype < DD_USED_NUM);
1531 ASSERT(newtype < DD_USED_NUM);
1532
d683ddbb
JG
1533 if (delta == 0 ||
1534 !(dsl_dir_phys(dd)->dd_flags & DD_FLAG_USED_BREAKDOWN))
b128c09f
BB
1535 return;
1536
a169a625
MA
1537 dmu_buf_will_dirty(dd->dd_dbuf, tx);
1538 mutex_enter(&dd->dd_lock);
b128c09f 1539 ASSERT(delta > 0 ?
d683ddbb
JG
1540 dsl_dir_phys(dd)->dd_used_breakdown[oldtype] >= delta :
1541 dsl_dir_phys(dd)->dd_used_breakdown[newtype] >= -delta);
1542 ASSERT(dsl_dir_phys(dd)->dd_used_bytes >= ABS(delta));
1543 dsl_dir_phys(dd)->dd_used_breakdown[oldtype] -= delta;
1544 dsl_dir_phys(dd)->dd_used_breakdown[newtype] += delta;
a169a625 1545 mutex_exit(&dd->dd_lock);
b128c09f
BB
1546}
1547
13fe0198
MA
1548typedef struct dsl_dir_set_qr_arg {
1549 const char *ddsqra_name;
1550 zprop_source_t ddsqra_source;
1551 uint64_t ddsqra_value;
1552} dsl_dir_set_qr_arg_t;
1553
34dc7c2f 1554static int
13fe0198 1555dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
34dc7c2f 1556{
13fe0198
MA
1557 dsl_dir_set_qr_arg_t *ddsqra = arg;
1558 dsl_pool_t *dp = dmu_tx_pool(tx);
1559 dsl_dataset_t *ds;
1560 int error;
1561 uint64_t towrite, newval;
34dc7c2f 1562
13fe0198
MA
1563 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
1564 if (error != 0)
1565 return (error);
1566
1567 error = dsl_prop_predict(ds->ds_dir, "quota",
1568 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
1569 if (error != 0) {
1570 dsl_dataset_rele(ds, FTAG);
1571 return (error);
1572 }
428870ff 1573
13fe0198
MA
1574 if (newval == 0) {
1575 dsl_dataset_rele(ds, FTAG);
34dc7c2f 1576 return (0);
13fe0198 1577 }
34dc7c2f 1578
13fe0198 1579 mutex_enter(&ds->ds_dir->dd_lock);
34dc7c2f
BB
1580 /*
1581 * If we are doing the preliminary check in open context, and
1582 * there are pending changes, then don't fail it, since the
1583 * pending changes could under-estimate the amount of space to be
1584 * freed up.
1585 */
13fe0198 1586 towrite = dsl_dir_space_towrite(ds->ds_dir);
34dc7c2f 1587 if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
d683ddbb
JG
1588 (newval < dsl_dir_phys(ds->ds_dir)->dd_reserved ||
1589 newval < dsl_dir_phys(ds->ds_dir)->dd_used_bytes + towrite)) {
2e528b49 1590 error = SET_ERROR(ENOSPC);
34dc7c2f 1591 }
13fe0198
MA
1592 mutex_exit(&ds->ds_dir->dd_lock);
1593 dsl_dataset_rele(ds, FTAG);
1594 return (error);
34dc7c2f
BB
1595}
1596
34dc7c2f 1597static void
13fe0198 1598dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
34dc7c2f 1599{
13fe0198
MA
1600 dsl_dir_set_qr_arg_t *ddsqra = arg;
1601 dsl_pool_t *dp = dmu_tx_pool(tx);
1602 dsl_dataset_t *ds;
1603 uint64_t newval;
428870ff 1604
13fe0198 1605 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
34dc7c2f 1606
b1118acb
MM
1607 if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) {
1608 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
1609 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
1610 &ddsqra->ddsqra_value, tx);
34dc7c2f 1611
b1118acb
MM
1612 VERIFY0(dsl_prop_get_int_ds(ds,
1613 zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
1614 } else {
1615 newval = ddsqra->ddsqra_value;
1616 spa_history_log_internal_ds(ds, "set", tx, "%s=%lld",
1617 zfs_prop_to_name(ZFS_PROP_QUOTA), (longlong_t)newval);
1618 }
6f1ffb06 1619
13fe0198
MA
1620 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
1621 mutex_enter(&ds->ds_dir->dd_lock);
d683ddbb 1622 dsl_dir_phys(ds->ds_dir)->dd_quota = newval;
13fe0198
MA
1623 mutex_exit(&ds->ds_dir->dd_lock);
1624 dsl_dataset_rele(ds, FTAG);
34dc7c2f
BB
1625}
1626
1627int
428870ff 1628dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
34dc7c2f 1629{
13fe0198 1630 dsl_dir_set_qr_arg_t ddsqra;
428870ff 1631
13fe0198
MA
1632 ddsqra.ddsqra_name = ddname;
1633 ddsqra.ddsqra_source = source;
1634 ddsqra.ddsqra_value = quota;
428870ff 1635
13fe0198 1636 return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
3d45fdd6 1637 dsl_dir_set_quota_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
34dc7c2f
BB
1638}
1639
1640int
13fe0198 1641dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
34dc7c2f 1642{
13fe0198
MA
1643 dsl_dir_set_qr_arg_t *ddsqra = arg;
1644 dsl_pool_t *dp = dmu_tx_pool(tx);
1645 dsl_dataset_t *ds;
1646 dsl_dir_t *dd;
1647 uint64_t newval, used, avail;
1648 int error;
428870ff 1649
13fe0198
MA
1650 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
1651 if (error != 0)
1652 return (error);
1653 dd = ds->ds_dir;
34dc7c2f
BB
1654
1655 /*
1656 * If we are doing the preliminary check in open context, the
1657 * space estimates may be inaccurate.
1658 */
13fe0198
MA
1659 if (!dmu_tx_is_syncing(tx)) {
1660 dsl_dataset_rele(ds, FTAG);
34dc7c2f 1661 return (0);
13fe0198
MA
1662 }
1663
1664 error = dsl_prop_predict(ds->ds_dir,
1665 zfs_prop_to_name(ZFS_PROP_RESERVATION),
1666 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
1667 if (error != 0) {
1668 dsl_dataset_rele(ds, FTAG);
1669 return (error);
1670 }
34dc7c2f
BB
1671
1672 mutex_enter(&dd->dd_lock);
d683ddbb 1673 used = dsl_dir_phys(dd)->dd_used_bytes;
34dc7c2f
BB
1674 mutex_exit(&dd->dd_lock);
1675
1676 if (dd->dd_parent) {
1677 avail = dsl_dir_space_available(dd->dd_parent,
1678 NULL, 0, FALSE);
1679 } else {
1680 avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
1681 }
1682
d683ddbb 1683 if (MAX(used, newval) > MAX(used, dsl_dir_phys(dd)->dd_reserved)) {
13fe0198 1684 uint64_t delta = MAX(used, newval) -
d683ddbb 1685 MAX(used, dsl_dir_phys(dd)->dd_reserved);
d164b209 1686
13fe0198 1687 if (delta > avail ||
d683ddbb
JG
1688 (dsl_dir_phys(dd)->dd_quota > 0 &&
1689 newval > dsl_dir_phys(dd)->dd_quota))
2e528b49 1690 error = SET_ERROR(ENOSPC);
d164b209
BB
1691 }
1692
13fe0198
MA
1693 dsl_dataset_rele(ds, FTAG);
1694 return (error);
34dc7c2f
BB
1695}
1696
13fe0198 1697void
6f1ffb06 1698dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
34dc7c2f 1699{
34dc7c2f
BB
1700 uint64_t used;
1701 int64_t delta;
1702
1703 dmu_buf_will_dirty(dd->dd_dbuf, tx);
1704
1705 mutex_enter(&dd->dd_lock);
d683ddbb
JG
1706 used = dsl_dir_phys(dd)->dd_used_bytes;
1707 delta = MAX(used, value) - MAX(used, dsl_dir_phys(dd)->dd_reserved);
1708 dsl_dir_phys(dd)->dd_reserved = value;
34dc7c2f
BB
1709
1710 if (dd->dd_parent != NULL) {
1711 /* Roll up this additional usage into our ancestors */
b128c09f
BB
1712 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1713 delta, 0, 0, tx);
34dc7c2f 1714 }
b128c09f 1715 mutex_exit(&dd->dd_lock);
34dc7c2f
BB
1716}
1717
6f1ffb06 1718static void
13fe0198 1719dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
6f1ffb06 1720{
13fe0198
MA
1721 dsl_dir_set_qr_arg_t *ddsqra = arg;
1722 dsl_pool_t *dp = dmu_tx_pool(tx);
1723 dsl_dataset_t *ds;
1724 uint64_t newval;
6f1ffb06 1725
13fe0198
MA
1726 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
1727
b1118acb
MM
1728 if (spa_version(dp->dp_spa) >= SPA_VERSION_RECVD_PROPS) {
1729 dsl_prop_set_sync_impl(ds,
1730 zfs_prop_to_name(ZFS_PROP_RESERVATION),
1731 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
1732 &ddsqra->ddsqra_value, tx);
d1d7e268 1733
b1118acb
MM
1734 VERIFY0(dsl_prop_get_int_ds(ds,
1735 zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
1736 } else {
1737 newval = ddsqra->ddsqra_value;
1738 spa_history_log_internal_ds(ds, "set", tx, "%s=%lld",
1739 zfs_prop_to_name(ZFS_PROP_RESERVATION),
1740 (longlong_t)newval);
1741 }
1742
13fe0198
MA
1743 dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
1744 dsl_dataset_rele(ds, FTAG);
d1d7e268 1745}
6f1ffb06 1746
34dc7c2f 1747int
428870ff
BB
1748dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
1749 uint64_t reservation)
34dc7c2f 1750{
13fe0198 1751 dsl_dir_set_qr_arg_t ddsqra;
428870ff 1752
13fe0198
MA
1753 ddsqra.ddsqra_name = ddname;
1754 ddsqra.ddsqra_source = source;
1755 ddsqra.ddsqra_value = reservation;
428870ff 1756
13fe0198 1757 return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
3d45fdd6 1758 dsl_dir_set_reservation_sync, &ddsqra, 0, ZFS_SPACE_CHECK_NONE));
34dc7c2f
BB
1759}
1760
1761static dsl_dir_t *
1762closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1763{
1764 for (; ds1; ds1 = ds1->dd_parent) {
1765 dsl_dir_t *dd;
1766 for (dd = ds2; dd; dd = dd->dd_parent) {
1767 if (ds1 == dd)
1768 return (dd);
1769 }
1770 }
1771 return (NULL);
1772}
1773
1774/*
1775 * If delta is applied to dd, how much of that delta would be applied to
1776 * ancestor? Syncing context only.
1777 */
1778static int64_t
1779would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1780{
1781 if (dd == ancestor)
1782 return (delta);
1783
1784 mutex_enter(&dd->dd_lock);
d683ddbb 1785 delta = parent_delta(dd, dsl_dir_phys(dd)->dd_used_bytes, delta);
34dc7c2f
BB
1786 mutex_exit(&dd->dd_lock);
1787 return (would_change(dd->dd_parent, delta, ancestor));
1788}
1789
13fe0198
MA
1790typedef struct dsl_dir_rename_arg {
1791 const char *ddra_oldname;
1792 const char *ddra_newname;
788eb90c 1793 cred_t *ddra_cred;
13fe0198 1794} dsl_dir_rename_arg_t;
34dc7c2f 1795
13fe0198 1796/* ARGSUSED */
34dc7c2f 1797static int
13fe0198 1798dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
34dc7c2f 1799{
13fe0198 1800 int *deltap = arg;
eca7b760 1801 char namebuf[ZFS_MAX_DATASET_NAME_LEN];
34dc7c2f 1802
13fe0198
MA
1803 dsl_dataset_name(ds, namebuf);
1804
eca7b760 1805 if (strlen(namebuf) + *deltap >= ZFS_MAX_DATASET_NAME_LEN)
2e528b49 1806 return (SET_ERROR(ENAMETOOLONG));
13fe0198
MA
1807 return (0);
1808}
1809
1810static int
1811dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
1812{
1813 dsl_dir_rename_arg_t *ddra = arg;
1814 dsl_pool_t *dp = dmu_tx_pool(tx);
1815 dsl_dir_t *dd, *newparent;
1816 const char *mynewname;
1817 int error;
1818 int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
34dc7c2f 1819
13fe0198
MA
1820 /* target dir should exist */
1821 error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
1822 if (error != 0)
1823 return (error);
1824
1825 /* new parent should exist */
1826 error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
1827 &newparent, &mynewname);
1828 if (error != 0) {
1829 dsl_dir_rele(dd, FTAG);
1830 return (error);
1831 }
1832
1833 /* can't rename to different pool */
1834 if (dd->dd_pool != newparent->dd_pool) {
1835 dsl_dir_rele(newparent, FTAG);
1836 dsl_dir_rele(dd, FTAG);
9063f654 1837 return (SET_ERROR(EXDEV));
13fe0198
MA
1838 }
1839
1840 /* new name should not already exist */
1841 if (mynewname == NULL) {
1842 dsl_dir_rele(newparent, FTAG);
1843 dsl_dir_rele(dd, FTAG);
2e528b49 1844 return (SET_ERROR(EEXIST));
13fe0198
MA
1845 }
1846
1847 /* if the name length is growing, validate child name lengths */
1848 if (delta > 0) {
1849 error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
1850 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
1851 if (error != 0) {
1852 dsl_dir_rele(newparent, FTAG);
1853 dsl_dir_rele(dd, FTAG);
1854 return (error);
1855 }
1856 }
34dc7c2f 1857
788eb90c 1858 if (dmu_tx_is_syncing(tx)) {
a0c9a17a 1859 if (spa_feature_is_active(dp->dp_spa,
788eb90c
JJ
1860 SPA_FEATURE_FS_SS_LIMIT)) {
1861 /*
1862 * Although this is the check function and we don't
1863 * normally make on-disk changes in check functions,
1864 * we need to do that here.
1865 *
1866 * Ensure this portion of the tree's counts have been
1867 * initialized in case the new parent has limits set.
1868 */
1869 dsl_dir_init_fs_ss_count(dd, tx);
1870 }
1871 }
1872
13fe0198 1873 if (newparent != dd->dd_parent) {
34dc7c2f
BB
1874 /* is there enough space? */
1875 uint64_t myspace =
d683ddbb
JG
1876 MAX(dsl_dir_phys(dd)->dd_used_bytes,
1877 dsl_dir_phys(dd)->dd_reserved);
788eb90c
JJ
1878 objset_t *os = dd->dd_pool->dp_meta_objset;
1879 uint64_t fs_cnt = 0;
1880 uint64_t ss_cnt = 0;
1881
1882 if (dsl_dir_is_zapified(dd)) {
1883 int err;
1884
1885 err = zap_lookup(os, dd->dd_object,
1886 DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1,
1887 &fs_cnt);
a0c9a17a
JJ
1888 if (err != ENOENT && err != 0) {
1889 dsl_dir_rele(newparent, FTAG);
1890 dsl_dir_rele(dd, FTAG);
788eb90c 1891 return (err);
a0c9a17a 1892 }
788eb90c
JJ
1893
1894 /*
1895 * have to add 1 for the filesystem itself that we're
1896 * moving
1897 */
1898 fs_cnt++;
1899
1900 err = zap_lookup(os, dd->dd_object,
1901 DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1,
1902 &ss_cnt);
a0c9a17a
JJ
1903 if (err != ENOENT && err != 0) {
1904 dsl_dir_rele(newparent, FTAG);
1905 dsl_dir_rele(dd, FTAG);
788eb90c 1906 return (err);
a0c9a17a 1907 }
788eb90c 1908 }
34dc7c2f 1909
b5256303
TC
1910 /* check for encryption errors */
1911 error = dsl_dir_rename_crypt_check(dd, newparent);
1912 if (error != 0) {
1913 dsl_dir_rele(newparent, FTAG);
1914 dsl_dir_rele(dd, FTAG);
1915 return (SET_ERROR(EACCES));
1916 }
1917
34dc7c2f 1918 /* no rename into our descendant */
13fe0198
MA
1919 if (closest_common_ancestor(dd, newparent) == dd) {
1920 dsl_dir_rele(newparent, FTAG);
1921 dsl_dir_rele(dd, FTAG);
2e528b49 1922 return (SET_ERROR(EINVAL));
13fe0198 1923 }
34dc7c2f 1924
13fe0198 1925 error = dsl_dir_transfer_possible(dd->dd_parent,
788eb90c 1926 newparent, fs_cnt, ss_cnt, myspace, ddra->ddra_cred);
13fe0198
MA
1927 if (error != 0) {
1928 dsl_dir_rele(newparent, FTAG);
1929 dsl_dir_rele(dd, FTAG);
1930 return (error);
1931 }
34dc7c2f
BB
1932 }
1933
13fe0198
MA
1934 dsl_dir_rele(newparent, FTAG);
1935 dsl_dir_rele(dd, FTAG);
34dc7c2f
BB
1936 return (0);
1937}
1938
1939static void
13fe0198 1940dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
34dc7c2f 1941{
13fe0198
MA
1942 dsl_dir_rename_arg_t *ddra = arg;
1943 dsl_pool_t *dp = dmu_tx_pool(tx);
1944 dsl_dir_t *dd, *newparent;
1945 const char *mynewname;
1946 int error;
34dc7c2f 1947 objset_t *mos = dp->dp_meta_objset;
34dc7c2f 1948
13fe0198
MA
1949 VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
1950 VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
1951 &mynewname));
34dc7c2f 1952
6f1ffb06 1953 /* Log this before we change the name. */
6f1ffb06 1954 spa_history_log_internal_dd(dd, "rename", tx,
13fe0198 1955 "-> %s", ddra->ddra_newname);
6f1ffb06 1956
13fe0198 1957 if (newparent != dd->dd_parent) {
788eb90c
JJ
1958 objset_t *os = dd->dd_pool->dp_meta_objset;
1959 uint64_t fs_cnt = 0;
1960 uint64_t ss_cnt = 0;
1961
1962 /*
1963 * We already made sure the dd counts were initialized in the
1964 * check function.
1965 */
a0c9a17a 1966 if (spa_feature_is_active(dp->dp_spa,
788eb90c
JJ
1967 SPA_FEATURE_FS_SS_LIMIT)) {
1968 VERIFY0(zap_lookup(os, dd->dd_object,
1969 DD_FIELD_FILESYSTEM_COUNT, sizeof (fs_cnt), 1,
1970 &fs_cnt));
1971 /* add 1 for the filesystem itself that we're moving */
1972 fs_cnt++;
1973
1974 VERIFY0(zap_lookup(os, dd->dd_object,
1975 DD_FIELD_SNAPSHOT_COUNT, sizeof (ss_cnt), 1,
1976 &ss_cnt));
1977 }
1978
1979 dsl_fs_ss_count_adjust(dd->dd_parent, -fs_cnt,
1980 DD_FIELD_FILESYSTEM_COUNT, tx);
1981 dsl_fs_ss_count_adjust(newparent, fs_cnt,
1982 DD_FIELD_FILESYSTEM_COUNT, tx);
1983
1984 dsl_fs_ss_count_adjust(dd->dd_parent, -ss_cnt,
1985 DD_FIELD_SNAPSHOT_COUNT, tx);
1986 dsl_fs_ss_count_adjust(newparent, ss_cnt,
1987 DD_FIELD_SNAPSHOT_COUNT, tx);
1988
b128c09f 1989 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
d683ddbb
JG
1990 -dsl_dir_phys(dd)->dd_used_bytes,
1991 -dsl_dir_phys(dd)->dd_compressed_bytes,
1992 -dsl_dir_phys(dd)->dd_uncompressed_bytes, tx);
13fe0198 1993 dsl_dir_diduse_space(newparent, DD_USED_CHILD,
d683ddbb
JG
1994 dsl_dir_phys(dd)->dd_used_bytes,
1995 dsl_dir_phys(dd)->dd_compressed_bytes,
1996 dsl_dir_phys(dd)->dd_uncompressed_bytes, tx);
b128c09f 1997
d683ddbb
JG
1998 if (dsl_dir_phys(dd)->dd_reserved >
1999 dsl_dir_phys(dd)->dd_used_bytes) {
2000 uint64_t unused_rsrv = dsl_dir_phys(dd)->dd_reserved -
2001 dsl_dir_phys(dd)->dd_used_bytes;
b128c09f
BB
2002
2003 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
2004 -unused_rsrv, 0, 0, tx);
13fe0198 2005 dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
b128c09f
BB
2006 unused_rsrv, 0, 0, tx);
2007 }
34dc7c2f
BB
2008 }
2009
2010 dmu_buf_will_dirty(dd->dd_dbuf, tx);
2011
2012 /* remove from old parent zapobj */
d683ddbb
JG
2013 error = zap_remove(mos,
2014 dsl_dir_phys(dd->dd_parent)->dd_child_dir_zapobj,
34dc7c2f 2015 dd->dd_myname, tx);
13fe0198 2016 ASSERT0(error);
34dc7c2f 2017
c9d61adb 2018 (void) strlcpy(dd->dd_myname, mynewname,
2019 sizeof (dd->dd_myname));
13fe0198 2020 dsl_dir_rele(dd->dd_parent, dd);
d683ddbb 2021 dsl_dir_phys(dd)->dd_parent_obj = newparent->dd_object;
13fe0198
MA
2022 VERIFY0(dsl_dir_hold_obj(dp,
2023 newparent->dd_object, NULL, dd, &dd->dd_parent));
34dc7c2f
BB
2024
2025 /* add to new parent zapobj */
d683ddbb 2026 VERIFY0(zap_add(mos, dsl_dir_phys(newparent)->dd_child_dir_zapobj,
13fe0198
MA
2027 dd->dd_myname, 8, 1, &dd->dd_object, tx));
2028
a0bd735a
BP
2029 zvol_rename_minors(dp->dp_spa, ddra->ddra_oldname,
2030 ddra->ddra_newname, B_TRUE);
ba6a2402 2031
13fe0198 2032 dsl_prop_notify_all(dd);
34dc7c2f 2033
13fe0198
MA
2034 dsl_dir_rele(newparent, FTAG);
2035 dsl_dir_rele(dd, FTAG);
34dc7c2f
BB
2036}
2037
2038int
13fe0198 2039dsl_dir_rename(const char *oldname, const char *newname)
34dc7c2f 2040{
13fe0198 2041 dsl_dir_rename_arg_t ddra;
34dc7c2f 2042
13fe0198
MA
2043 ddra.ddra_oldname = oldname;
2044 ddra.ddra_newname = newname;
788eb90c 2045 ddra.ddra_cred = CRED();
34dc7c2f 2046
13fe0198 2047 return (dsl_sync_task(oldname,
3d45fdd6
MA
2048 dsl_dir_rename_check, dsl_dir_rename_sync, &ddra,
2049 3, ZFS_SPACE_CHECK_RESERVED));
34dc7c2f
BB
2050}
2051
2052int
788eb90c
JJ
2053dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
2054 uint64_t fs_cnt, uint64_t ss_cnt, uint64_t space, cred_t *cr)
34dc7c2f
BB
2055{
2056 dsl_dir_t *ancestor;
2057 int64_t adelta;
2058 uint64_t avail;
788eb90c 2059 int err;
34dc7c2f
BB
2060
2061 ancestor = closest_common_ancestor(sdd, tdd);
2062 adelta = would_change(sdd, -space, ancestor);
2063 avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
2064 if (avail < space)
2e528b49 2065 return (SET_ERROR(ENOSPC));
34dc7c2f 2066
788eb90c
JJ
2067 err = dsl_fs_ss_limit_check(tdd, fs_cnt, ZFS_PROP_FILESYSTEM_LIMIT,
2068 ancestor, cr);
2069 if (err != 0)
2070 return (err);
2071 err = dsl_fs_ss_limit_check(tdd, ss_cnt, ZFS_PROP_SNAPSHOT_LIMIT,
2072 ancestor, cr);
2073 if (err != 0)
2074 return (err);
2075
34dc7c2f
BB
2076 return (0);
2077}
428870ff
BB
2078
2079timestruc_t
2080dsl_dir_snap_cmtime(dsl_dir_t *dd)
2081{
2082 timestruc_t t;
2083
2084 mutex_enter(&dd->dd_lock);
2085 t = dd->dd_snap_cmtime;
2086 mutex_exit(&dd->dd_lock);
2087
2088 return (t);
2089}
2090
2091void
2092dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
2093{
2094 timestruc_t t;
2095
2096 gethrestime(&t);
2097 mutex_enter(&dd->dd_lock);
2098 dd->dd_snap_cmtime = t;
2099 mutex_exit(&dd->dd_lock);
2100}
c28b2279 2101
fa86b5db
MA
2102void
2103dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx)
2104{
2105 objset_t *mos = dd->dd_pool->dp_meta_objset;
2106 dmu_object_zapify(mos, dd->dd_object, DMU_OT_DSL_DIR, tx);
2107}
2108
788eb90c
JJ
2109boolean_t
2110dsl_dir_is_zapified(dsl_dir_t *dd)
2111{
2112 dmu_object_info_t doi;
2113
2114 dmu_object_info_from_db(dd->dd_dbuf, &doi);
2115 return (doi.doi_type == DMU_OTN_ZAP_METADATA);
2116}
2117
c28b2279
BB
2118#if defined(_KERNEL) && defined(HAVE_SPL)
2119EXPORT_SYMBOL(dsl_dir_set_quota);
2120EXPORT_SYMBOL(dsl_dir_set_reservation);
c28b2279 2121#endif