]> git.proxmox.com Git - mirror_zfs.git/blob - module/zfs/dsl_dir.c
Illumos #3598
[mirror_zfs.git] / module / zfs / dsl_dir.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 */
25
26 #include <sys/dmu.h>
27 #include <sys/dmu_objset.h>
28 #include <sys/dmu_tx.h>
29 #include <sys/dsl_dataset.h>
30 #include <sys/dsl_dir.h>
31 #include <sys/dsl_prop.h>
32 #include <sys/dsl_synctask.h>
33 #include <sys/dsl_deleg.h>
34 #include <sys/spa.h>
35 #include <sys/metaslab.h>
36 #include <sys/zap.h>
37 #include <sys/zio.h>
38 #include <sys/arc.h>
39 #include <sys/sunddi.h>
40 #include "zfs_namecheck.h"
41
42 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
43
44 /* ARGSUSED */
45 static void
46 dsl_dir_evict(dmu_buf_t *db, void *arg)
47 {
48 dsl_dir_t *dd = arg;
49 ASSERTV(dsl_pool_t *dp = dd->dd_pool;)
50 int t;
51
52 for (t = 0; t < TXG_SIZE; t++) {
53 ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
54 ASSERT(dd->dd_tempreserved[t] == 0);
55 ASSERT(dd->dd_space_towrite[t] == 0);
56 }
57
58 if (dd->dd_parent)
59 dsl_dir_rele(dd->dd_parent, dd);
60
61 spa_close(dd->dd_pool->dp_spa, dd);
62
63 /*
64 * The props callback list should have been cleaned up by
65 * objset_evict().
66 */
67 list_destroy(&dd->dd_prop_cbs);
68 mutex_destroy(&dd->dd_lock);
69 kmem_free(dd, sizeof (dsl_dir_t));
70 }
71
72 int
73 dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
74 const char *tail, void *tag, dsl_dir_t **ddp)
75 {
76 dmu_buf_t *dbuf;
77 dsl_dir_t *dd;
78 int err;
79
80 ASSERT(dsl_pool_config_held(dp));
81
82 err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
83 if (err != 0)
84 return (err);
85 dd = dmu_buf_get_user(dbuf);
86 #ifdef ZFS_DEBUG
87 {
88 dmu_object_info_t doi;
89 dmu_object_info_from_db(dbuf, &doi);
90 ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
91 ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
92 }
93 #endif
94 if (dd == NULL) {
95 dsl_dir_t *winner;
96
97 dd = kmem_zalloc(sizeof (dsl_dir_t), KM_PUSHPAGE);
98 dd->dd_object = ddobj;
99 dd->dd_dbuf = dbuf;
100 dd->dd_pool = dp;
101 dd->dd_phys = dbuf->db_data;
102 mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
103
104 list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
105 offsetof(dsl_prop_cb_record_t, cbr_node));
106
107 dsl_dir_snap_cmtime_update(dd);
108
109 if (dd->dd_phys->dd_parent_obj) {
110 err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj,
111 NULL, dd, &dd->dd_parent);
112 if (err != 0)
113 goto errout;
114 if (tail) {
115 #ifdef ZFS_DEBUG
116 uint64_t foundobj;
117
118 err = zap_lookup(dp->dp_meta_objset,
119 dd->dd_parent->dd_phys->dd_child_dir_zapobj,
120 tail, sizeof (foundobj), 1, &foundobj);
121 ASSERT(err || foundobj == ddobj);
122 #endif
123 (void) strcpy(dd->dd_myname, tail);
124 } else {
125 err = zap_value_search(dp->dp_meta_objset,
126 dd->dd_parent->dd_phys->dd_child_dir_zapobj,
127 ddobj, 0, dd->dd_myname);
128 }
129 if (err != 0)
130 goto errout;
131 } else {
132 (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
133 }
134
135 if (dsl_dir_is_clone(dd)) {
136 dmu_buf_t *origin_bonus;
137 dsl_dataset_phys_t *origin_phys;
138
139 /*
140 * We can't open the origin dataset, because
141 * that would require opening this dsl_dir.
142 * Just look at its phys directly instead.
143 */
144 err = dmu_bonus_hold(dp->dp_meta_objset,
145 dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
146 if (err != 0)
147 goto errout;
148 origin_phys = origin_bonus->db_data;
149 dd->dd_origin_txg =
150 origin_phys->ds_creation_txg;
151 dmu_buf_rele(origin_bonus, FTAG);
152 }
153
154 winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
155 dsl_dir_evict);
156 if (winner) {
157 if (dd->dd_parent)
158 dsl_dir_rele(dd->dd_parent, dd);
159 mutex_destroy(&dd->dd_lock);
160 kmem_free(dd, sizeof (dsl_dir_t));
161 dd = winner;
162 } else {
163 spa_open_ref(dp->dp_spa, dd);
164 }
165 }
166
167 /*
168 * The dsl_dir_t has both open-to-close and instantiate-to-evict
169 * holds on the spa. We need the open-to-close holds because
170 * otherwise the spa_refcnt wouldn't change when we open a
171 * dir which the spa also has open, so we could incorrectly
172 * think it was OK to unload/export/destroy the pool. We need
173 * the instantiate-to-evict hold because the dsl_dir_t has a
174 * pointer to the dd_pool, which has a pointer to the spa_t.
175 */
176 spa_open_ref(dp->dp_spa, tag);
177 ASSERT3P(dd->dd_pool, ==, dp);
178 ASSERT3U(dd->dd_object, ==, ddobj);
179 ASSERT3P(dd->dd_dbuf, ==, dbuf);
180 *ddp = dd;
181 return (0);
182
183 errout:
184 if (dd->dd_parent)
185 dsl_dir_rele(dd->dd_parent, dd);
186 mutex_destroy(&dd->dd_lock);
187 kmem_free(dd, sizeof (dsl_dir_t));
188 dmu_buf_rele(dbuf, tag);
189 return (err);
190 }
191
192 void
193 dsl_dir_rele(dsl_dir_t *dd, void *tag)
194 {
195 dprintf_dd(dd, "%s\n", "");
196 spa_close(dd->dd_pool->dp_spa, tag);
197 dmu_buf_rele(dd->dd_dbuf, tag);
198 }
199
200 /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
201 void
202 dsl_dir_name(dsl_dir_t *dd, char *buf)
203 {
204 if (dd->dd_parent) {
205 dsl_dir_name(dd->dd_parent, buf);
206 (void) strcat(buf, "/");
207 } else {
208 buf[0] = '\0';
209 }
210 if (!MUTEX_HELD(&dd->dd_lock)) {
211 /*
212 * recursive mutex so that we can use
213 * dprintf_dd() with dd_lock held
214 */
215 mutex_enter(&dd->dd_lock);
216 (void) strcat(buf, dd->dd_myname);
217 mutex_exit(&dd->dd_lock);
218 } else {
219 (void) strcat(buf, dd->dd_myname);
220 }
221 }
222
223 /* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
224 int
225 dsl_dir_namelen(dsl_dir_t *dd)
226 {
227 int result = 0;
228
229 if (dd->dd_parent) {
230 /* parent's name + 1 for the "/" */
231 result = dsl_dir_namelen(dd->dd_parent) + 1;
232 }
233
234 if (!MUTEX_HELD(&dd->dd_lock)) {
235 /* see dsl_dir_name */
236 mutex_enter(&dd->dd_lock);
237 result += strlen(dd->dd_myname);
238 mutex_exit(&dd->dd_lock);
239 } else {
240 result += strlen(dd->dd_myname);
241 }
242
243 return (result);
244 }
245
246 static int
247 getcomponent(const char *path, char *component, const char **nextp)
248 {
249 char *p;
250
251 if ((path == NULL) || (path[0] == '\0'))
252 return (SET_ERROR(ENOENT));
253 /* This would be a good place to reserve some namespace... */
254 p = strpbrk(path, "/@");
255 if (p && (p[1] == '/' || p[1] == '@')) {
256 /* two separators in a row */
257 return (SET_ERROR(EINVAL));
258 }
259 if (p == NULL || p == path) {
260 /*
261 * if the first thing is an @ or /, it had better be an
262 * @ and it had better not have any more ats or slashes,
263 * and it had better have something after the @.
264 */
265 if (p != NULL &&
266 (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
267 return (SET_ERROR(EINVAL));
268 if (strlen(path) >= MAXNAMELEN)
269 return (SET_ERROR(ENAMETOOLONG));
270 (void) strcpy(component, path);
271 p = NULL;
272 } else if (p[0] == '/') {
273 if (p - path >= MAXNAMELEN)
274 return (SET_ERROR(ENAMETOOLONG));
275 (void) strncpy(component, path, p - path);
276 component[p - path] = '\0';
277 p++;
278 } else if (p[0] == '@') {
279 /*
280 * if the next separator is an @, there better not be
281 * any more slashes.
282 */
283 if (strchr(path, '/'))
284 return (SET_ERROR(EINVAL));
285 if (p - path >= MAXNAMELEN)
286 return (SET_ERROR(ENAMETOOLONG));
287 (void) strncpy(component, path, p - path);
288 component[p - path] = '\0';
289 } else {
290 panic("invalid p=%p", (void *)p);
291 }
292 *nextp = p;
293 return (0);
294 }
295
296 /*
297 * Return the dsl_dir_t, and possibly the last component which couldn't
298 * be found in *tail. The name must be in the specified dsl_pool_t. This
299 * thread must hold the dp_config_rwlock for the pool. Returns NULL if the
300 * path is bogus, or if tail==NULL and we couldn't parse the whole name.
301 * (*tail)[0] == '@' means that the last component is a snapshot.
302 */
303 int
304 dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
305 dsl_dir_t **ddp, const char **tailp)
306 {
307 char *buf;
308 const char *spaname, *next, *nextnext = NULL;
309 int err;
310 dsl_dir_t *dd;
311 uint64_t ddobj;
312
313 buf = kmem_alloc(MAXNAMELEN, KM_PUSHPAGE);
314 err = getcomponent(name, buf, &next);
315 if (err != 0)
316 goto error;
317
318 /* Make sure the name is in the specified pool. */
319 spaname = spa_name(dp->dp_spa);
320 if (strcmp(buf, spaname) != 0) {
321 err = SET_ERROR(EINVAL);
322 goto error;
323 }
324
325 ASSERT(dsl_pool_config_held(dp));
326
327 err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
328 if (err != 0) {
329 goto error;
330 }
331
332 while (next != NULL) {
333 dsl_dir_t *child_ds;
334 err = getcomponent(next, buf, &nextnext);
335 if (err != 0)
336 break;
337 ASSERT(next[0] != '\0');
338 if (next[0] == '@')
339 break;
340 dprintf("looking up %s in obj%lld\n",
341 buf, dd->dd_phys->dd_child_dir_zapobj);
342
343 err = zap_lookup(dp->dp_meta_objset,
344 dd->dd_phys->dd_child_dir_zapobj,
345 buf, sizeof (ddobj), 1, &ddobj);
346 if (err != 0) {
347 if (err == ENOENT)
348 err = 0;
349 break;
350 }
351
352 err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
353 if (err != 0)
354 break;
355 dsl_dir_rele(dd, tag);
356 dd = child_ds;
357 next = nextnext;
358 }
359
360 if (err != 0) {
361 dsl_dir_rele(dd, tag);
362 goto error;
363 }
364
365 /*
366 * It's an error if there's more than one component left, or
367 * tailp==NULL and there's any component left.
368 */
369 if (next != NULL &&
370 (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
371 /* bad path name */
372 dsl_dir_rele(dd, tag);
373 dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
374 err = SET_ERROR(ENOENT);
375 }
376 if (tailp != NULL)
377 *tailp = next;
378 *ddp = dd;
379 error:
380 kmem_free(buf, MAXNAMELEN);
381 return (err);
382 }
383
384 uint64_t
385 dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
386 dmu_tx_t *tx)
387 {
388 objset_t *mos = dp->dp_meta_objset;
389 uint64_t ddobj;
390 dsl_dir_phys_t *ddphys;
391 dmu_buf_t *dbuf;
392
393 ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
394 DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
395 if (pds) {
396 VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
397 name, sizeof (uint64_t), 1, &ddobj, tx));
398 } else {
399 /* it's the root dir */
400 VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
401 DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
402 }
403 VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
404 dmu_buf_will_dirty(dbuf, tx);
405 ddphys = dbuf->db_data;
406
407 ddphys->dd_creation_time = gethrestime_sec();
408 if (pds)
409 ddphys->dd_parent_obj = pds->dd_object;
410 ddphys->dd_props_zapobj = zap_create(mos,
411 DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
412 ddphys->dd_child_dir_zapobj = zap_create(mos,
413 DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
414 if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
415 ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
416 dmu_buf_rele(dbuf, FTAG);
417
418 return (ddobj);
419 }
420
421 boolean_t
422 dsl_dir_is_clone(dsl_dir_t *dd)
423 {
424 return (dd->dd_phys->dd_origin_obj &&
425 (dd->dd_pool->dp_origin_snap == NULL ||
426 dd->dd_phys->dd_origin_obj !=
427 dd->dd_pool->dp_origin_snap->ds_object));
428 }
429
430 void
431 dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
432 {
433 mutex_enter(&dd->dd_lock);
434 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
435 dd->dd_phys->dd_used_bytes);
436 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
437 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
438 dd->dd_phys->dd_reserved);
439 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
440 dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
441 (dd->dd_phys->dd_uncompressed_bytes * 100 /
442 dd->dd_phys->dd_compressed_bytes));
443 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_LOGICALUSED,
444 dd->dd_phys->dd_uncompressed_bytes);
445 if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
446 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
447 dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
448 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
449 dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
450 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
451 dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
452 dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
453 dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
454 dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
455 }
456 mutex_exit(&dd->dd_lock);
457
458 if (dsl_dir_is_clone(dd)) {
459 dsl_dataset_t *ds;
460 char buf[MAXNAMELEN];
461
462 VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
463 dd->dd_phys->dd_origin_obj, FTAG, &ds));
464 dsl_dataset_name(ds, buf);
465 dsl_dataset_rele(ds, FTAG);
466 dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
467 }
468 }
469
470 void
471 dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
472 {
473 dsl_pool_t *dp = dd->dd_pool;
474
475 ASSERT(dd->dd_phys);
476
477 if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
478 /* up the hold count until we can be written out */
479 dmu_buf_add_ref(dd->dd_dbuf, dd);
480 }
481 }
482
483 static int64_t
484 parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
485 {
486 uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
487 uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
488 return (new_accounted - old_accounted);
489 }
490
491 void
492 dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
493 {
494 ASSERT(dmu_tx_is_syncing(tx));
495
496 mutex_enter(&dd->dd_lock);
497 ASSERT0(dd->dd_tempreserved[tx->tx_txg&TXG_MASK]);
498 dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
499 dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
500 dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
501 mutex_exit(&dd->dd_lock);
502
503 /* release the hold from dsl_dir_dirty */
504 dmu_buf_rele(dd->dd_dbuf, dd);
505 }
506
507 static uint64_t
508 dsl_dir_space_towrite(dsl_dir_t *dd)
509 {
510 uint64_t space = 0;
511 int i;
512
513 ASSERT(MUTEX_HELD(&dd->dd_lock));
514
515 for (i = 0; i < TXG_SIZE; i++) {
516 space += dd->dd_space_towrite[i&TXG_MASK];
517 ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
518 }
519 return (space);
520 }
521
522 /*
523 * How much space would dd have available if ancestor had delta applied
524 * to it? If ondiskonly is set, we're only interested in what's
525 * on-disk, not estimated pending changes.
526 */
527 uint64_t
528 dsl_dir_space_available(dsl_dir_t *dd,
529 dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
530 {
531 uint64_t parentspace, myspace, quota, used;
532
533 /*
534 * If there are no restrictions otherwise, assume we have
535 * unlimited space available.
536 */
537 quota = UINT64_MAX;
538 parentspace = UINT64_MAX;
539
540 if (dd->dd_parent != NULL) {
541 parentspace = dsl_dir_space_available(dd->dd_parent,
542 ancestor, delta, ondiskonly);
543 }
544
545 mutex_enter(&dd->dd_lock);
546 if (dd->dd_phys->dd_quota != 0)
547 quota = dd->dd_phys->dd_quota;
548 used = dd->dd_phys->dd_used_bytes;
549 if (!ondiskonly)
550 used += dsl_dir_space_towrite(dd);
551
552 if (dd->dd_parent == NULL) {
553 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
554 quota = MIN(quota, poolsize);
555 }
556
557 if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
558 /*
559 * We have some space reserved, in addition to what our
560 * parent gave us.
561 */
562 parentspace += dd->dd_phys->dd_reserved - used;
563 }
564
565 if (dd == ancestor) {
566 ASSERT(delta <= 0);
567 ASSERT(used >= -delta);
568 used += delta;
569 if (parentspace != UINT64_MAX)
570 parentspace -= delta;
571 }
572
573 if (used > quota) {
574 /* over quota */
575 myspace = 0;
576 } else {
577 /*
578 * the lesser of the space provided by our parent and
579 * the space left in our quota
580 */
581 myspace = MIN(parentspace, quota - used);
582 }
583
584 mutex_exit(&dd->dd_lock);
585
586 return (myspace);
587 }
588
589 struct tempreserve {
590 list_node_t tr_node;
591 dsl_pool_t *tr_dp;
592 dsl_dir_t *tr_ds;
593 uint64_t tr_size;
594 };
595
596 static int
597 dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
598 boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
599 dmu_tx_t *tx, boolean_t first)
600 {
601 uint64_t txg = tx->tx_txg;
602 uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
603 uint64_t deferred = 0;
604 struct tempreserve *tr;
605 int retval = EDQUOT;
606 int txgidx = txg & TXG_MASK;
607 int i;
608 uint64_t ref_rsrv = 0;
609
610 ASSERT3U(txg, !=, 0);
611 ASSERT3S(asize, >, 0);
612
613 mutex_enter(&dd->dd_lock);
614
615 /*
616 * Check against the dsl_dir's quota. We don't add in the delta
617 * when checking for over-quota because they get one free hit.
618 */
619 est_inflight = dsl_dir_space_towrite(dd);
620 for (i = 0; i < TXG_SIZE; i++)
621 est_inflight += dd->dd_tempreserved[i];
622 used_on_disk = dd->dd_phys->dd_used_bytes;
623
624 /*
625 * On the first iteration, fetch the dataset's used-on-disk and
626 * refreservation values. Also, if checkrefquota is set, test if
627 * allocating this space would exceed the dataset's refquota.
628 */
629 if (first && tx->tx_objset) {
630 int error;
631 dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
632
633 error = dsl_dataset_check_quota(ds, checkrefquota,
634 asize, est_inflight, &used_on_disk, &ref_rsrv);
635 if (error) {
636 mutex_exit(&dd->dd_lock);
637 return (error);
638 }
639 }
640
641 /*
642 * If this transaction will result in a net free of space,
643 * we want to let it through.
644 */
645 if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
646 quota = UINT64_MAX;
647 else
648 quota = dd->dd_phys->dd_quota;
649
650 /*
651 * Adjust the quota against the actual pool size at the root
652 * minus any outstanding deferred frees.
653 * To ensure that it's possible to remove files from a full
654 * pool without inducing transient overcommits, we throttle
655 * netfree transactions against a quota that is slightly larger,
656 * but still within the pool's allocation slop. In cases where
657 * we're very close to full, this will allow a steady trickle of
658 * removes to get through.
659 */
660 if (dd->dd_parent == NULL) {
661 spa_t *spa = dd->dd_pool->dp_spa;
662 uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
663 deferred = metaslab_class_get_deferred(spa_normal_class(spa));
664 if (poolsize - deferred < quota) {
665 quota = poolsize - deferred;
666 retval = ENOSPC;
667 }
668 }
669
670 /*
671 * If they are requesting more space, and our current estimate
672 * is over quota, they get to try again unless the actual
673 * on-disk is over quota and there are no pending changes (which
674 * may free up space for us).
675 */
676 if (used_on_disk + est_inflight >= quota) {
677 if (est_inflight > 0 || used_on_disk < quota ||
678 (retval == ENOSPC && used_on_disk < quota + deferred))
679 retval = ERESTART;
680 dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
681 "quota=%lluK tr=%lluK err=%d\n",
682 used_on_disk>>10, est_inflight>>10,
683 quota>>10, asize>>10, retval);
684 mutex_exit(&dd->dd_lock);
685 return (SET_ERROR(retval));
686 }
687
688 /* We need to up our estimated delta before dropping dd_lock */
689 dd->dd_tempreserved[txgidx] += asize;
690
691 parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
692 asize - ref_rsrv);
693 mutex_exit(&dd->dd_lock);
694
695 tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE);
696 tr->tr_ds = dd;
697 tr->tr_size = asize;
698 list_insert_tail(tr_list, tr);
699
700 /* see if it's OK with our parent */
701 if (dd->dd_parent && parent_rsrv) {
702 boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
703
704 return (dsl_dir_tempreserve_impl(dd->dd_parent,
705 parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
706 } else {
707 return (0);
708 }
709 }
710
711 /*
712 * Reserve space in this dsl_dir, to be used in this tx's txg.
713 * After the space has been dirtied (and dsl_dir_willuse_space()
714 * has been called), the reservation should be canceled, using
715 * dsl_dir_tempreserve_clear().
716 */
717 int
718 dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
719 uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
720 {
721 int err;
722 list_t *tr_list;
723
724 if (asize == 0) {
725 *tr_cookiep = NULL;
726 return (0);
727 }
728
729 tr_list = kmem_alloc(sizeof (list_t), KM_PUSHPAGE);
730 list_create(tr_list, sizeof (struct tempreserve),
731 offsetof(struct tempreserve, tr_node));
732 ASSERT3S(asize, >, 0);
733 ASSERT3S(fsize, >=, 0);
734
735 err = arc_tempreserve_space(lsize, tx->tx_txg);
736 if (err == 0) {
737 struct tempreserve *tr;
738
739 tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE);
740 tr->tr_size = lsize;
741 list_insert_tail(tr_list, tr);
742
743 err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
744 } else {
745 if (err == EAGAIN) {
746 txg_delay(dd->dd_pool, tx->tx_txg, 1);
747 err = SET_ERROR(ERESTART);
748 }
749 dsl_pool_memory_pressure(dd->dd_pool);
750 }
751
752 if (err == 0) {
753 struct tempreserve *tr;
754
755 tr = kmem_zalloc(sizeof (struct tempreserve), KM_PUSHPAGE);
756 tr->tr_dp = dd->dd_pool;
757 tr->tr_size = asize;
758 list_insert_tail(tr_list, tr);
759
760 err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
761 FALSE, asize > usize, tr_list, tx, TRUE);
762 }
763
764 if (err != 0)
765 dsl_dir_tempreserve_clear(tr_list, tx);
766 else
767 *tr_cookiep = tr_list;
768
769 return (err);
770 }
771
772 /*
773 * Clear a temporary reservation that we previously made with
774 * dsl_dir_tempreserve_space().
775 */
776 void
777 dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
778 {
779 int txgidx = tx->tx_txg & TXG_MASK;
780 list_t *tr_list = tr_cookie;
781 struct tempreserve *tr;
782
783 ASSERT3U(tx->tx_txg, !=, 0);
784
785 if (tr_cookie == NULL)
786 return;
787
788 while ((tr = list_head(tr_list))) {
789 if (tr->tr_dp) {
790 dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
791 } else if (tr->tr_ds) {
792 mutex_enter(&tr->tr_ds->dd_lock);
793 ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
794 tr->tr_size);
795 tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
796 mutex_exit(&tr->tr_ds->dd_lock);
797 } else {
798 arc_tempreserve_clear(tr->tr_size);
799 }
800 list_remove(tr_list, tr);
801 kmem_free(tr, sizeof (struct tempreserve));
802 }
803
804 kmem_free(tr_list, sizeof (list_t));
805 }
806
807 static void
808 dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
809 {
810 int64_t parent_space;
811 uint64_t est_used;
812
813 mutex_enter(&dd->dd_lock);
814 if (space > 0)
815 dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
816
817 est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
818 parent_space = parent_delta(dd, est_used, space);
819 mutex_exit(&dd->dd_lock);
820
821 /* Make sure that we clean up dd_space_to* */
822 dsl_dir_dirty(dd, tx);
823
824 /* XXX this is potentially expensive and unnecessary... */
825 if (parent_space && dd->dd_parent)
826 dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
827 }
828
829 /*
830 * Call in open context when we think we're going to write/free space,
831 * eg. when dirtying data. Be conservative (ie. OK to write less than
832 * this or free more than this, but don't write more or free less).
833 */
834 void
835 dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
836 {
837 dsl_pool_willuse_space(dd->dd_pool, space, tx);
838 dsl_dir_willuse_space_impl(dd, space, tx);
839 }
840
841 /* call from syncing context when we actually write/free space for this dd */
842 void
843 dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
844 int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
845 {
846 int64_t accounted_delta;
847 boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
848
849 ASSERT(dmu_tx_is_syncing(tx));
850 ASSERT(type < DD_USED_NUM);
851
852 if (needlock)
853 mutex_enter(&dd->dd_lock);
854 accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
855 ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
856 ASSERT(compressed >= 0 ||
857 dd->dd_phys->dd_compressed_bytes >= -compressed);
858 ASSERT(uncompressed >= 0 ||
859 dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
860 dmu_buf_will_dirty(dd->dd_dbuf, tx);
861 dd->dd_phys->dd_used_bytes += used;
862 dd->dd_phys->dd_uncompressed_bytes += uncompressed;
863 dd->dd_phys->dd_compressed_bytes += compressed;
864
865 if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
866 ASSERT(used > 0 ||
867 dd->dd_phys->dd_used_breakdown[type] >= -used);
868 dd->dd_phys->dd_used_breakdown[type] += used;
869 #ifdef DEBUG
870 {
871 dd_used_t t;
872 uint64_t u = 0;
873 for (t = 0; t < DD_USED_NUM; t++)
874 u += dd->dd_phys->dd_used_breakdown[t];
875 ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
876 }
877 #endif
878 }
879 if (needlock)
880 mutex_exit(&dd->dd_lock);
881
882 if (dd->dd_parent != NULL) {
883 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
884 accounted_delta, compressed, uncompressed, tx);
885 dsl_dir_transfer_space(dd->dd_parent,
886 used - accounted_delta,
887 DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
888 }
889 }
890
891 void
892 dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
893 dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
894 {
895 boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
896
897 ASSERT(dmu_tx_is_syncing(tx));
898 ASSERT(oldtype < DD_USED_NUM);
899 ASSERT(newtype < DD_USED_NUM);
900
901 if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
902 return;
903
904 if (needlock)
905 mutex_enter(&dd->dd_lock);
906 ASSERT(delta > 0 ?
907 dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
908 dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
909 ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
910 dmu_buf_will_dirty(dd->dd_dbuf, tx);
911 dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
912 dd->dd_phys->dd_used_breakdown[newtype] += delta;
913 if (needlock)
914 mutex_exit(&dd->dd_lock);
915 }
916
917 typedef struct dsl_dir_set_qr_arg {
918 const char *ddsqra_name;
919 zprop_source_t ddsqra_source;
920 uint64_t ddsqra_value;
921 } dsl_dir_set_qr_arg_t;
922
923 static int
924 dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
925 {
926 dsl_dir_set_qr_arg_t *ddsqra = arg;
927 dsl_pool_t *dp = dmu_tx_pool(tx);
928 dsl_dataset_t *ds;
929 int error;
930 uint64_t towrite, newval;
931
932 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
933 if (error != 0)
934 return (error);
935
936 error = dsl_prop_predict(ds->ds_dir, "quota",
937 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
938 if (error != 0) {
939 dsl_dataset_rele(ds, FTAG);
940 return (error);
941 }
942
943 if (newval == 0) {
944 dsl_dataset_rele(ds, FTAG);
945 return (0);
946 }
947
948 mutex_enter(&ds->ds_dir->dd_lock);
949 /*
950 * If we are doing the preliminary check in open context, and
951 * there are pending changes, then don't fail it, since the
952 * pending changes could under-estimate the amount of space to be
953 * freed up.
954 */
955 towrite = dsl_dir_space_towrite(ds->ds_dir);
956 if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
957 (newval < ds->ds_dir->dd_phys->dd_reserved ||
958 newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) {
959 error = SET_ERROR(ENOSPC);
960 }
961 mutex_exit(&ds->ds_dir->dd_lock);
962 dsl_dataset_rele(ds, FTAG);
963 return (error);
964 }
965
966 static void
967 dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
968 {
969 dsl_dir_set_qr_arg_t *ddsqra = arg;
970 dsl_pool_t *dp = dmu_tx_pool(tx);
971 dsl_dataset_t *ds;
972 uint64_t newval;
973
974 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
975
976 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
977 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
978 &ddsqra->ddsqra_value, tx);
979
980 VERIFY0(dsl_prop_get_int_ds(ds,
981 zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
982
983 dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
984 mutex_enter(&ds->ds_dir->dd_lock);
985 ds->ds_dir->dd_phys->dd_quota = newval;
986 mutex_exit(&ds->ds_dir->dd_lock);
987 dsl_dataset_rele(ds, FTAG);
988 }
989
990 int
991 dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
992 {
993 dsl_dir_set_qr_arg_t ddsqra;
994
995 ddsqra.ddsqra_name = ddname;
996 ddsqra.ddsqra_source = source;
997 ddsqra.ddsqra_value = quota;
998
999 return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
1000 dsl_dir_set_quota_sync, &ddsqra, 0));
1001 }
1002
1003 int
1004 dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
1005 {
1006 dsl_dir_set_qr_arg_t *ddsqra = arg;
1007 dsl_pool_t *dp = dmu_tx_pool(tx);
1008 dsl_dataset_t *ds;
1009 dsl_dir_t *dd;
1010 uint64_t newval, used, avail;
1011 int error;
1012
1013 error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
1014 if (error != 0)
1015 return (error);
1016 dd = ds->ds_dir;
1017
1018 /*
1019 * If we are doing the preliminary check in open context, the
1020 * space estimates may be inaccurate.
1021 */
1022 if (!dmu_tx_is_syncing(tx)) {
1023 dsl_dataset_rele(ds, FTAG);
1024 return (0);
1025 }
1026
1027 error = dsl_prop_predict(ds->ds_dir,
1028 zfs_prop_to_name(ZFS_PROP_RESERVATION),
1029 ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
1030 if (error != 0) {
1031 dsl_dataset_rele(ds, FTAG);
1032 return (error);
1033 }
1034
1035 mutex_enter(&dd->dd_lock);
1036 used = dd->dd_phys->dd_used_bytes;
1037 mutex_exit(&dd->dd_lock);
1038
1039 if (dd->dd_parent) {
1040 avail = dsl_dir_space_available(dd->dd_parent,
1041 NULL, 0, FALSE);
1042 } else {
1043 avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
1044 }
1045
1046 if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) {
1047 uint64_t delta = MAX(used, newval) -
1048 MAX(used, dd->dd_phys->dd_reserved);
1049
1050 if (delta > avail ||
1051 (dd->dd_phys->dd_quota > 0 &&
1052 newval > dd->dd_phys->dd_quota))
1053 error = SET_ERROR(ENOSPC);
1054 }
1055
1056 dsl_dataset_rele(ds, FTAG);
1057 return (error);
1058 }
1059
1060 void
1061 dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
1062 {
1063 uint64_t used;
1064 int64_t delta;
1065
1066 dmu_buf_will_dirty(dd->dd_dbuf, tx);
1067
1068 mutex_enter(&dd->dd_lock);
1069 used = dd->dd_phys->dd_used_bytes;
1070 delta = MAX(used, value) - MAX(used, dd->dd_phys->dd_reserved);
1071 dd->dd_phys->dd_reserved = value;
1072
1073 if (dd->dd_parent != NULL) {
1074 /* Roll up this additional usage into our ancestors */
1075 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1076 delta, 0, 0, tx);
1077 }
1078 mutex_exit(&dd->dd_lock);
1079 }
1080
1081 static void
1082 dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
1083 {
1084 dsl_dir_set_qr_arg_t *ddsqra = arg;
1085 dsl_pool_t *dp = dmu_tx_pool(tx);
1086 dsl_dataset_t *ds;
1087 uint64_t newval;
1088
1089 VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
1090
1091 dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION),
1092 ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
1093 &ddsqra->ddsqra_value, tx);
1094
1095 VERIFY0(dsl_prop_get_int_ds(ds,
1096 zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
1097
1098 dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
1099 dsl_dataset_rele(ds, FTAG);
1100 }
1101
1102 int
1103 dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
1104 uint64_t reservation)
1105 {
1106 dsl_dir_set_qr_arg_t ddsqra;
1107
1108 ddsqra.ddsqra_name = ddname;
1109 ddsqra.ddsqra_source = source;
1110 ddsqra.ddsqra_value = reservation;
1111
1112 return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
1113 dsl_dir_set_reservation_sync, &ddsqra, 0));
1114 }
1115
1116 static dsl_dir_t *
1117 closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1118 {
1119 for (; ds1; ds1 = ds1->dd_parent) {
1120 dsl_dir_t *dd;
1121 for (dd = ds2; dd; dd = dd->dd_parent) {
1122 if (ds1 == dd)
1123 return (dd);
1124 }
1125 }
1126 return (NULL);
1127 }
1128
1129 /*
1130 * If delta is applied to dd, how much of that delta would be applied to
1131 * ancestor? Syncing context only.
1132 */
1133 static int64_t
1134 would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1135 {
1136 if (dd == ancestor)
1137 return (delta);
1138
1139 mutex_enter(&dd->dd_lock);
1140 delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
1141 mutex_exit(&dd->dd_lock);
1142 return (would_change(dd->dd_parent, delta, ancestor));
1143 }
1144
1145 typedef struct dsl_dir_rename_arg {
1146 const char *ddra_oldname;
1147 const char *ddra_newname;
1148 } dsl_dir_rename_arg_t;
1149
1150 /* ARGSUSED */
1151 static int
1152 dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
1153 {
1154 int *deltap = arg;
1155 char namebuf[MAXNAMELEN];
1156
1157 dsl_dataset_name(ds, namebuf);
1158
1159 if (strlen(namebuf) + *deltap >= MAXNAMELEN)
1160 return (SET_ERROR(ENAMETOOLONG));
1161 return (0);
1162 }
1163
1164 static int
1165 dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
1166 {
1167 dsl_dir_rename_arg_t *ddra = arg;
1168 dsl_pool_t *dp = dmu_tx_pool(tx);
1169 dsl_dir_t *dd, *newparent;
1170 const char *mynewname;
1171 int error;
1172 int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
1173
1174 /* target dir should exist */
1175 error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
1176 if (error != 0)
1177 return (error);
1178
1179 /* new parent should exist */
1180 error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
1181 &newparent, &mynewname);
1182 if (error != 0) {
1183 dsl_dir_rele(dd, FTAG);
1184 return (error);
1185 }
1186
1187 /* can't rename to different pool */
1188 if (dd->dd_pool != newparent->dd_pool) {
1189 dsl_dir_rele(newparent, FTAG);
1190 dsl_dir_rele(dd, FTAG);
1191 return (SET_ERROR(ENXIO));
1192 }
1193
1194 /* new name should not already exist */
1195 if (mynewname == NULL) {
1196 dsl_dir_rele(newparent, FTAG);
1197 dsl_dir_rele(dd, FTAG);
1198 return (SET_ERROR(EEXIST));
1199 }
1200
1201 /* if the name length is growing, validate child name lengths */
1202 if (delta > 0) {
1203 error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
1204 &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
1205 if (error != 0) {
1206 dsl_dir_rele(newparent, FTAG);
1207 dsl_dir_rele(dd, FTAG);
1208 return (error);
1209 }
1210 }
1211
1212 if (newparent != dd->dd_parent) {
1213 /* is there enough space? */
1214 uint64_t myspace =
1215 MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
1216
1217 /* no rename into our descendant */
1218 if (closest_common_ancestor(dd, newparent) == dd) {
1219 dsl_dir_rele(newparent, FTAG);
1220 dsl_dir_rele(dd, FTAG);
1221 return (SET_ERROR(EINVAL));
1222 }
1223
1224 error = dsl_dir_transfer_possible(dd->dd_parent,
1225 newparent, myspace);
1226 if (error != 0) {
1227 dsl_dir_rele(newparent, FTAG);
1228 dsl_dir_rele(dd, FTAG);
1229 return (error);
1230 }
1231 }
1232
1233 dsl_dir_rele(newparent, FTAG);
1234 dsl_dir_rele(dd, FTAG);
1235 return (0);
1236 }
1237
1238 static void
1239 dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
1240 {
1241 dsl_dir_rename_arg_t *ddra = arg;
1242 dsl_pool_t *dp = dmu_tx_pool(tx);
1243 dsl_dir_t *dd, *newparent;
1244 const char *mynewname;
1245 int error;
1246 objset_t *mos = dp->dp_meta_objset;
1247
1248 VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
1249 VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
1250 &mynewname));
1251
1252 /* Log this before we change the name. */
1253 spa_history_log_internal_dd(dd, "rename", tx,
1254 "-> %s", ddra->ddra_newname);
1255
1256 if (newparent != dd->dd_parent) {
1257 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
1258 -dd->dd_phys->dd_used_bytes,
1259 -dd->dd_phys->dd_compressed_bytes,
1260 -dd->dd_phys->dd_uncompressed_bytes, tx);
1261 dsl_dir_diduse_space(newparent, DD_USED_CHILD,
1262 dd->dd_phys->dd_used_bytes,
1263 dd->dd_phys->dd_compressed_bytes,
1264 dd->dd_phys->dd_uncompressed_bytes, tx);
1265
1266 if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
1267 uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
1268 dd->dd_phys->dd_used_bytes;
1269
1270 dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
1271 -unused_rsrv, 0, 0, tx);
1272 dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
1273 unused_rsrv, 0, 0, tx);
1274 }
1275 }
1276
1277 dmu_buf_will_dirty(dd->dd_dbuf, tx);
1278
1279 /* remove from old parent zapobj */
1280 error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
1281 dd->dd_myname, tx);
1282 ASSERT0(error);
1283
1284 (void) strcpy(dd->dd_myname, mynewname);
1285 dsl_dir_rele(dd->dd_parent, dd);
1286 dd->dd_phys->dd_parent_obj = newparent->dd_object;
1287 VERIFY0(dsl_dir_hold_obj(dp,
1288 newparent->dd_object, NULL, dd, &dd->dd_parent));
1289
1290 /* add to new parent zapobj */
1291 VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj,
1292 dd->dd_myname, 8, 1, &dd->dd_object, tx));
1293
1294 dsl_prop_notify_all(dd);
1295
1296 dsl_dir_rele(newparent, FTAG);
1297 dsl_dir_rele(dd, FTAG);
1298 }
1299
1300 int
1301 dsl_dir_rename(const char *oldname, const char *newname)
1302 {
1303 dsl_dir_rename_arg_t ddra;
1304
1305 ddra.ddra_oldname = oldname;
1306 ddra.ddra_newname = newname;
1307
1308 return (dsl_sync_task(oldname,
1309 dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3));
1310 }
1311
1312 int
1313 dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
1314 {
1315 dsl_dir_t *ancestor;
1316 int64_t adelta;
1317 uint64_t avail;
1318
1319 ancestor = closest_common_ancestor(sdd, tdd);
1320 adelta = would_change(sdd, -space, ancestor);
1321 avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
1322 if (avail < space)
1323 return (SET_ERROR(ENOSPC));
1324
1325 return (0);
1326 }
1327
1328 timestruc_t
1329 dsl_dir_snap_cmtime(dsl_dir_t *dd)
1330 {
1331 timestruc_t t;
1332
1333 mutex_enter(&dd->dd_lock);
1334 t = dd->dd_snap_cmtime;
1335 mutex_exit(&dd->dd_lock);
1336
1337 return (t);
1338 }
1339
1340 void
1341 dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
1342 {
1343 timestruc_t t;
1344
1345 gethrestime(&t);
1346 mutex_enter(&dd->dd_lock);
1347 dd->dd_snap_cmtime = t;
1348 mutex_exit(&dd->dd_lock);
1349 }
1350
1351 #if defined(_KERNEL) && defined(HAVE_SPL)
1352 EXPORT_SYMBOL(dsl_dir_set_quota);
1353 EXPORT_SYMBOL(dsl_dir_set_reservation);
1354 #endif