4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
27 #include <sys/dmu_objset.h>
28 #include <sys/dsl_dataset.h>
29 #include <sys/dsl_dir.h>
30 #include <sys/dsl_prop.h>
31 #include <sys/dsl_synctask.h>
32 #include <sys/dmu_traverse.h>
33 #include <sys/dmu_impl.h>
34 #include <sys/dmu_tx.h>
38 #include <sys/zfeature.h>
39 #include <sys/unique.h>
40 #include <sys/zfs_context.h>
41 #include <sys/zfs_ioctl.h>
43 #include <sys/zfs_znode.h>
44 #include <sys/zfs_onexit.h>
46 #include <sys/dsl_scan.h>
47 #include <sys/dsl_deadlist.h>
48 #include <sys/dsl_destroy.h>
49 #include <sys/dsl_userhold.h>
51 #define SWITCH64(x, y) \
53 uint64_t __tmp = (x); \
58 #define DS_REF_MAX (1ULL << 62)
60 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
63 * Figure out how much of this delta should be propogated to the dsl_dir
64 * layer. If there's a refreservation, that space has already been
65 * partially accounted for in our ancestors.
68 parent_delta(dsl_dataset_t
*ds
, int64_t delta
)
70 uint64_t old_bytes
, new_bytes
;
72 if (ds
->ds_reserved
== 0)
75 old_bytes
= MAX(ds
->ds_phys
->ds_unique_bytes
, ds
->ds_reserved
);
76 new_bytes
= MAX(ds
->ds_phys
->ds_unique_bytes
+ delta
, ds
->ds_reserved
);
78 ASSERT3U(ABS((int64_t)(new_bytes
- old_bytes
)), <=, ABS(delta
));
79 return (new_bytes
- old_bytes
);
83 dsl_dataset_block_born(dsl_dataset_t
*ds
, const blkptr_t
*bp
, dmu_tx_t
*tx
)
85 int used
, compressed
, uncompressed
;
88 used
= bp_get_dsize_sync(tx
->tx_pool
->dp_spa
, bp
);
89 compressed
= BP_GET_PSIZE(bp
);
90 uncompressed
= BP_GET_UCSIZE(bp
);
92 dprintf_bp(bp
, "ds=%p", ds
);
94 ASSERT(dmu_tx_is_syncing(tx
));
95 /* It could have been compressed away to nothing */
98 ASSERT(BP_GET_TYPE(bp
) != DMU_OT_NONE
);
99 ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp
)));
101 dsl_pool_mos_diduse_space(tx
->tx_pool
,
102 used
, compressed
, uncompressed
);
105 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
107 mutex_enter(&ds
->ds_dir
->dd_lock
);
108 mutex_enter(&ds
->ds_lock
);
109 delta
= parent_delta(ds
, used
);
110 ds
->ds_phys
->ds_referenced_bytes
+= used
;
111 ds
->ds_phys
->ds_compressed_bytes
+= compressed
;
112 ds
->ds_phys
->ds_uncompressed_bytes
+= uncompressed
;
113 ds
->ds_phys
->ds_unique_bytes
+= used
;
114 mutex_exit(&ds
->ds_lock
);
115 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_HEAD
, delta
,
116 compressed
, uncompressed
, tx
);
117 dsl_dir_transfer_space(ds
->ds_dir
, used
- delta
,
118 DD_USED_REFRSRV
, DD_USED_HEAD
, tx
);
119 mutex_exit(&ds
->ds_dir
->dd_lock
);
123 dsl_dataset_block_kill(dsl_dataset_t
*ds
, const blkptr_t
*bp
, dmu_tx_t
*tx
,
126 int used
, compressed
, uncompressed
;
131 ASSERT(dmu_tx_is_syncing(tx
));
132 ASSERT(bp
->blk_birth
<= tx
->tx_txg
);
134 used
= bp_get_dsize_sync(tx
->tx_pool
->dp_spa
, bp
);
135 compressed
= BP_GET_PSIZE(bp
);
136 uncompressed
= BP_GET_UCSIZE(bp
);
140 dsl_free(tx
->tx_pool
, tx
->tx_txg
, bp
);
141 dsl_pool_mos_diduse_space(tx
->tx_pool
,
142 -used
, -compressed
, -uncompressed
);
145 ASSERT3P(tx
->tx_pool
, ==, ds
->ds_dir
->dd_pool
);
147 ASSERT(!dsl_dataset_is_snapshot(ds
));
148 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
150 if (bp
->blk_birth
> ds
->ds_phys
->ds_prev_snap_txg
) {
153 dprintf_bp(bp
, "freeing ds=%llu", ds
->ds_object
);
154 dsl_free(tx
->tx_pool
, tx
->tx_txg
, bp
);
156 mutex_enter(&ds
->ds_dir
->dd_lock
);
157 mutex_enter(&ds
->ds_lock
);
158 ASSERT(ds
->ds_phys
->ds_unique_bytes
>= used
||
159 !DS_UNIQUE_IS_ACCURATE(ds
));
160 delta
= parent_delta(ds
, -used
);
161 ds
->ds_phys
->ds_unique_bytes
-= used
;
162 mutex_exit(&ds
->ds_lock
);
163 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_HEAD
,
164 delta
, -compressed
, -uncompressed
, tx
);
165 dsl_dir_transfer_space(ds
->ds_dir
, -used
- delta
,
166 DD_USED_REFRSRV
, DD_USED_HEAD
, tx
);
167 mutex_exit(&ds
->ds_dir
->dd_lock
);
169 dprintf_bp(bp
, "putting on dead list: %s", "");
172 * We are here as part of zio's write done callback,
173 * which means we're a zio interrupt thread. We can't
174 * call dsl_deadlist_insert() now because it may block
175 * waiting for I/O. Instead, put bp on the deferred
176 * queue and let dsl_pool_sync() finish the job.
178 bplist_append(&ds
->ds_pending_deadlist
, bp
);
180 dsl_deadlist_insert(&ds
->ds_deadlist
, bp
, tx
);
182 ASSERT3U(ds
->ds_prev
->ds_object
, ==,
183 ds
->ds_phys
->ds_prev_snap_obj
);
184 ASSERT(ds
->ds_prev
->ds_phys
->ds_num_children
> 0);
185 /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
186 if (ds
->ds_prev
->ds_phys
->ds_next_snap_obj
==
187 ds
->ds_object
&& bp
->blk_birth
>
188 ds
->ds_prev
->ds_phys
->ds_prev_snap_txg
) {
189 dmu_buf_will_dirty(ds
->ds_prev
->ds_dbuf
, tx
);
190 mutex_enter(&ds
->ds_prev
->ds_lock
);
191 ds
->ds_prev
->ds_phys
->ds_unique_bytes
+= used
;
192 mutex_exit(&ds
->ds_prev
->ds_lock
);
194 if (bp
->blk_birth
> ds
->ds_dir
->dd_origin_txg
) {
195 dsl_dir_transfer_space(ds
->ds_dir
, used
,
196 DD_USED_HEAD
, DD_USED_SNAP
, tx
);
199 mutex_enter(&ds
->ds_lock
);
200 ASSERT3U(ds
->ds_phys
->ds_referenced_bytes
, >=, used
);
201 ds
->ds_phys
->ds_referenced_bytes
-= used
;
202 ASSERT3U(ds
->ds_phys
->ds_compressed_bytes
, >=, compressed
);
203 ds
->ds_phys
->ds_compressed_bytes
-= compressed
;
204 ASSERT3U(ds
->ds_phys
->ds_uncompressed_bytes
, >=, uncompressed
);
205 ds
->ds_phys
->ds_uncompressed_bytes
-= uncompressed
;
206 mutex_exit(&ds
->ds_lock
);
212 dsl_dataset_prev_snap_txg(dsl_dataset_t
*ds
)
214 uint64_t trysnap
= 0;
219 * The snapshot creation could fail, but that would cause an
220 * incorrect FALSE return, which would only result in an
221 * overestimation of the amount of space that an operation would
222 * consume, which is OK.
224 * There's also a small window where we could miss a pending
225 * snapshot, because we could set the sync task in the quiescing
226 * phase. So this should only be used as a guess.
228 if (ds
->ds_trysnap_txg
>
229 spa_last_synced_txg(ds
->ds_dir
->dd_pool
->dp_spa
))
230 trysnap
= ds
->ds_trysnap_txg
;
231 return (MAX(ds
->ds_phys
->ds_prev_snap_txg
, trysnap
));
235 dsl_dataset_block_freeable(dsl_dataset_t
*ds
, const blkptr_t
*bp
,
238 if (blk_birth
<= dsl_dataset_prev_snap_txg(ds
))
241 ddt_prefetch(dsl_dataset_get_spa(ds
), bp
);
248 dsl_dataset_evict(dmu_buf_t
*db
, void *dsv
)
250 dsl_dataset_t
*ds
= dsv
;
252 ASSERT(ds
->ds_owner
== NULL
);
254 unique_remove(ds
->ds_fsid_guid
);
256 if (ds
->ds_objset
!= NULL
)
257 dmu_objset_evict(ds
->ds_objset
);
260 dsl_dataset_rele(ds
->ds_prev
, ds
);
264 bplist_destroy(&ds
->ds_pending_deadlist
);
265 if (ds
->ds_phys
->ds_deadlist_obj
!= 0)
266 dsl_deadlist_close(&ds
->ds_deadlist
);
268 dsl_dir_rele(ds
->ds_dir
, ds
);
270 ASSERT(!list_link_active(&ds
->ds_synced_link
));
272 mutex_destroy(&ds
->ds_lock
);
273 mutex_destroy(&ds
->ds_opening_lock
);
274 refcount_destroy(&ds
->ds_longholds
);
276 kmem_free(ds
, sizeof (dsl_dataset_t
));
280 dsl_dataset_get_snapname(dsl_dataset_t
*ds
)
282 dsl_dataset_phys_t
*headphys
;
285 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
286 objset_t
*mos
= dp
->dp_meta_objset
;
288 if (ds
->ds_snapname
[0])
290 if (ds
->ds_phys
->ds_next_snap_obj
== 0)
293 err
= dmu_bonus_hold(mos
, ds
->ds_dir
->dd_phys
->dd_head_dataset_obj
,
297 headphys
= headdbuf
->db_data
;
298 err
= zap_value_search(dp
->dp_meta_objset
,
299 headphys
->ds_snapnames_zapobj
, ds
->ds_object
, 0, ds
->ds_snapname
);
300 dmu_buf_rele(headdbuf
, FTAG
);
305 dsl_dataset_snap_lookup(dsl_dataset_t
*ds
, const char *name
, uint64_t *value
)
307 objset_t
*mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
308 uint64_t snapobj
= ds
->ds_phys
->ds_snapnames_zapobj
;
312 if (ds
->ds_phys
->ds_flags
& DS_FLAG_CI_DATASET
)
317 err
= zap_lookup_norm(mos
, snapobj
, name
, 8, 1,
318 value
, mt
, NULL
, 0, NULL
);
319 if (err
== ENOTSUP
&& mt
== MT_FIRST
)
320 err
= zap_lookup(mos
, snapobj
, name
, 8, 1, value
);
325 dsl_dataset_snap_remove(dsl_dataset_t
*ds
, const char *name
, dmu_tx_t
*tx
)
327 objset_t
*mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
328 uint64_t snapobj
= ds
->ds_phys
->ds_snapnames_zapobj
;
332 dsl_dir_snap_cmtime_update(ds
->ds_dir
);
334 if (ds
->ds_phys
->ds_flags
& DS_FLAG_CI_DATASET
)
339 err
= zap_remove_norm(mos
, snapobj
, name
, mt
, tx
);
340 if (err
== ENOTSUP
&& mt
== MT_FIRST
)
341 err
= zap_remove(mos
, snapobj
, name
, tx
);
346 dsl_dataset_hold_obj(dsl_pool_t
*dp
, uint64_t dsobj
, void *tag
,
349 objset_t
*mos
= dp
->dp_meta_objset
;
353 dmu_object_info_t doi
;
355 ASSERT(dsl_pool_config_held(dp
));
357 err
= dmu_bonus_hold(mos
, dsobj
, tag
, &dbuf
);
361 /* Make sure dsobj has the correct object type. */
362 dmu_object_info_from_db(dbuf
, &doi
);
363 if (doi
.doi_type
!= DMU_OT_DSL_DATASET
) {
364 dmu_buf_rele(dbuf
, tag
);
365 return (SET_ERROR(EINVAL
));
368 ds
= dmu_buf_get_user(dbuf
);
370 dsl_dataset_t
*winner
= NULL
;
372 ds
= kmem_zalloc(sizeof (dsl_dataset_t
), KM_PUSHPAGE
);
374 ds
->ds_object
= dsobj
;
375 ds
->ds_phys
= dbuf
->db_data
;
376 list_link_init(&ds
->ds_synced_link
);
378 mutex_init(&ds
->ds_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
379 mutex_init(&ds
->ds_opening_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
380 mutex_init(&ds
->ds_sendstream_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
381 refcount_create(&ds
->ds_longholds
);
383 bplist_create(&ds
->ds_pending_deadlist
);
384 dsl_deadlist_open(&ds
->ds_deadlist
,
385 mos
, ds
->ds_phys
->ds_deadlist_obj
);
387 list_create(&ds
->ds_sendstreams
, sizeof (dmu_sendarg_t
),
388 offsetof(dmu_sendarg_t
, dsa_link
));
391 err
= dsl_dir_hold_obj(dp
,
392 ds
->ds_phys
->ds_dir_obj
, NULL
, ds
, &ds
->ds_dir
);
395 mutex_destroy(&ds
->ds_lock
);
396 mutex_destroy(&ds
->ds_opening_lock
);
397 refcount_destroy(&ds
->ds_longholds
);
398 bplist_destroy(&ds
->ds_pending_deadlist
);
399 dsl_deadlist_close(&ds
->ds_deadlist
);
400 kmem_free(ds
, sizeof (dsl_dataset_t
));
401 dmu_buf_rele(dbuf
, tag
);
405 if (!dsl_dataset_is_snapshot(ds
)) {
406 ds
->ds_snapname
[0] = '\0';
407 if (ds
->ds_phys
->ds_prev_snap_obj
!= 0) {
408 err
= dsl_dataset_hold_obj(dp
,
409 ds
->ds_phys
->ds_prev_snap_obj
,
413 if (zfs_flags
& ZFS_DEBUG_SNAPNAMES
)
414 err
= dsl_dataset_get_snapname(ds
);
415 if (err
== 0 && ds
->ds_phys
->ds_userrefs_obj
!= 0) {
417 ds
->ds_dir
->dd_pool
->dp_meta_objset
,
418 ds
->ds_phys
->ds_userrefs_obj
,
423 if (err
== 0 && !dsl_dataset_is_snapshot(ds
)) {
424 err
= dsl_prop_get_int_ds(ds
,
425 zfs_prop_to_name(ZFS_PROP_REFRESERVATION
),
428 err
= dsl_prop_get_int_ds(ds
,
429 zfs_prop_to_name(ZFS_PROP_REFQUOTA
),
433 ds
->ds_reserved
= ds
->ds_quota
= 0;
436 if (err
!= 0 || (winner
= dmu_buf_set_user_ie(dbuf
, ds
,
437 &ds
->ds_phys
, dsl_dataset_evict
)) != NULL
) {
438 bplist_destroy(&ds
->ds_pending_deadlist
);
439 dsl_deadlist_close(&ds
->ds_deadlist
);
441 dsl_dataset_rele(ds
->ds_prev
, ds
);
442 dsl_dir_rele(ds
->ds_dir
, ds
);
443 mutex_destroy(&ds
->ds_lock
);
444 mutex_destroy(&ds
->ds_opening_lock
);
445 refcount_destroy(&ds
->ds_longholds
);
446 kmem_free(ds
, sizeof (dsl_dataset_t
));
448 dmu_buf_rele(dbuf
, tag
);
454 unique_insert(ds
->ds_phys
->ds_fsid_guid
);
457 ASSERT3P(ds
->ds_dbuf
, ==, dbuf
);
458 ASSERT3P(ds
->ds_phys
, ==, dbuf
->db_data
);
459 ASSERT(ds
->ds_phys
->ds_prev_snap_obj
!= 0 ||
460 spa_version(dp
->dp_spa
) < SPA_VERSION_ORIGIN
||
461 dp
->dp_origin_snap
== NULL
|| ds
== dp
->dp_origin_snap
);
467 dsl_dataset_hold(dsl_pool_t
*dp
, const char *name
,
468 void *tag
, dsl_dataset_t
**dsp
)
471 const char *snapname
;
475 err
= dsl_dir_hold(dp
, name
, FTAG
, &dd
, &snapname
);
479 ASSERT(dsl_pool_config_held(dp
));
480 obj
= dd
->dd_phys
->dd_head_dataset_obj
;
482 err
= dsl_dataset_hold_obj(dp
, obj
, tag
, dsp
);
484 err
= SET_ERROR(ENOENT
);
486 /* we may be looking for a snapshot */
487 if (err
== 0 && snapname
!= NULL
) {
490 if (*snapname
++ != '@') {
491 dsl_dataset_rele(*dsp
, tag
);
492 dsl_dir_rele(dd
, FTAG
);
493 return (SET_ERROR(ENOENT
));
496 dprintf("looking for snapshot '%s'\n", snapname
);
497 err
= dsl_dataset_snap_lookup(*dsp
, snapname
, &obj
);
499 err
= dsl_dataset_hold_obj(dp
, obj
, tag
, &ds
);
500 dsl_dataset_rele(*dsp
, tag
);
503 mutex_enter(&ds
->ds_lock
);
504 if (ds
->ds_snapname
[0] == 0)
505 (void) strlcpy(ds
->ds_snapname
, snapname
,
506 sizeof (ds
->ds_snapname
));
507 mutex_exit(&ds
->ds_lock
);
512 dsl_dir_rele(dd
, FTAG
);
517 dsl_dataset_own_obj(dsl_pool_t
*dp
, uint64_t dsobj
,
518 void *tag
, dsl_dataset_t
**dsp
)
520 int err
= dsl_dataset_hold_obj(dp
, dsobj
, tag
, dsp
);
523 if (!dsl_dataset_tryown(*dsp
, tag
)) {
524 dsl_dataset_rele(*dsp
, tag
);
526 return (SET_ERROR(EBUSY
));
532 dsl_dataset_own(dsl_pool_t
*dp
, const char *name
,
533 void *tag
, dsl_dataset_t
**dsp
)
535 int err
= dsl_dataset_hold(dp
, name
, tag
, dsp
);
538 if (!dsl_dataset_tryown(*dsp
, tag
)) {
539 dsl_dataset_rele(*dsp
, tag
);
540 return (SET_ERROR(EBUSY
));
546 * See the comment above dsl_pool_hold() for details. In summary, a long
547 * hold is used to prevent destruction of a dataset while the pool hold
548 * is dropped, allowing other concurrent operations (e.g. spa_sync()).
550 * The dataset and pool must be held when this function is called. After it
551 * is called, the pool hold may be released while the dataset is still held
555 dsl_dataset_long_hold(dsl_dataset_t
*ds
, void *tag
)
557 ASSERT(dsl_pool_config_held(ds
->ds_dir
->dd_pool
));
558 (void) refcount_add(&ds
->ds_longholds
, tag
);
562 dsl_dataset_long_rele(dsl_dataset_t
*ds
, void *tag
)
564 (void) refcount_remove(&ds
->ds_longholds
, tag
);
567 /* Return B_TRUE if there are any long holds on this dataset. */
569 dsl_dataset_long_held(dsl_dataset_t
*ds
)
571 return (!refcount_is_zero(&ds
->ds_longholds
));
575 dsl_dataset_name(dsl_dataset_t
*ds
, char *name
)
578 (void) strcpy(name
, "mos");
580 dsl_dir_name(ds
->ds_dir
, name
);
581 VERIFY0(dsl_dataset_get_snapname(ds
));
582 if (ds
->ds_snapname
[0]) {
583 (void) strcat(name
, "@");
585 * We use a "recursive" mutex so that we
586 * can call dprintf_ds() with ds_lock held.
588 if (!MUTEX_HELD(&ds
->ds_lock
)) {
589 mutex_enter(&ds
->ds_lock
);
590 (void) strcat(name
, ds
->ds_snapname
);
591 mutex_exit(&ds
->ds_lock
);
593 (void) strcat(name
, ds
->ds_snapname
);
600 dsl_dataset_rele(dsl_dataset_t
*ds
, void *tag
)
602 dmu_buf_rele(ds
->ds_dbuf
, tag
);
606 dsl_dataset_disown(dsl_dataset_t
*ds
, void *tag
)
608 ASSERT(ds
->ds_owner
== tag
&& ds
->ds_dbuf
!= NULL
);
610 mutex_enter(&ds
->ds_lock
);
612 mutex_exit(&ds
->ds_lock
);
613 dsl_dataset_long_rele(ds
, tag
);
614 if (ds
->ds_dbuf
!= NULL
)
615 dsl_dataset_rele(ds
, tag
);
617 dsl_dataset_evict(NULL
, ds
);
621 dsl_dataset_tryown(dsl_dataset_t
*ds
, void *tag
)
623 boolean_t gotit
= FALSE
;
625 mutex_enter(&ds
->ds_lock
);
626 if (ds
->ds_owner
== NULL
&& !DS_IS_INCONSISTENT(ds
)) {
628 dsl_dataset_long_hold(ds
, tag
);
631 mutex_exit(&ds
->ds_lock
);
636 dsl_dataset_create_sync_dd(dsl_dir_t
*dd
, dsl_dataset_t
*origin
,
637 uint64_t flags
, dmu_tx_t
*tx
)
639 dsl_pool_t
*dp
= dd
->dd_pool
;
641 dsl_dataset_phys_t
*dsphys
;
643 objset_t
*mos
= dp
->dp_meta_objset
;
646 origin
= dp
->dp_origin_snap
;
648 ASSERT(origin
== NULL
|| origin
->ds_dir
->dd_pool
== dp
);
649 ASSERT(origin
== NULL
|| origin
->ds_phys
->ds_num_children
> 0);
650 ASSERT(dmu_tx_is_syncing(tx
));
651 ASSERT(dd
->dd_phys
->dd_head_dataset_obj
== 0);
653 dsobj
= dmu_object_alloc(mos
, DMU_OT_DSL_DATASET
, 0,
654 DMU_OT_DSL_DATASET
, sizeof (dsl_dataset_phys_t
), tx
);
655 VERIFY0(dmu_bonus_hold(mos
, dsobj
, FTAG
, &dbuf
));
656 dmu_buf_will_dirty(dbuf
, tx
);
657 dsphys
= dbuf
->db_data
;
658 bzero(dsphys
, sizeof (dsl_dataset_phys_t
));
659 dsphys
->ds_dir_obj
= dd
->dd_object
;
660 dsphys
->ds_flags
= flags
;
661 dsphys
->ds_fsid_guid
= unique_create();
662 (void) random_get_pseudo_bytes((void*)&dsphys
->ds_guid
,
663 sizeof (dsphys
->ds_guid
));
664 dsphys
->ds_snapnames_zapobj
=
665 zap_create_norm(mos
, U8_TEXTPREP_TOUPPER
, DMU_OT_DSL_DS_SNAP_MAP
,
667 dsphys
->ds_creation_time
= gethrestime_sec();
668 dsphys
->ds_creation_txg
= tx
->tx_txg
== TXG_INITIAL
? 1 : tx
->tx_txg
;
670 if (origin
== NULL
) {
671 dsphys
->ds_deadlist_obj
= dsl_deadlist_alloc(mos
, tx
);
673 dsl_dataset_t
*ohds
; /* head of the origin snapshot */
675 dsphys
->ds_prev_snap_obj
= origin
->ds_object
;
676 dsphys
->ds_prev_snap_txg
=
677 origin
->ds_phys
->ds_creation_txg
;
678 dsphys
->ds_referenced_bytes
=
679 origin
->ds_phys
->ds_referenced_bytes
;
680 dsphys
->ds_compressed_bytes
=
681 origin
->ds_phys
->ds_compressed_bytes
;
682 dsphys
->ds_uncompressed_bytes
=
683 origin
->ds_phys
->ds_uncompressed_bytes
;
684 dsphys
->ds_bp
= origin
->ds_phys
->ds_bp
;
685 dsphys
->ds_flags
|= origin
->ds_phys
->ds_flags
;
687 dmu_buf_will_dirty(origin
->ds_dbuf
, tx
);
688 origin
->ds_phys
->ds_num_children
++;
690 VERIFY0(dsl_dataset_hold_obj(dp
,
691 origin
->ds_dir
->dd_phys
->dd_head_dataset_obj
, FTAG
, &ohds
));
692 dsphys
->ds_deadlist_obj
= dsl_deadlist_clone(&ohds
->ds_deadlist
,
693 dsphys
->ds_prev_snap_txg
, dsphys
->ds_prev_snap_obj
, tx
);
694 dsl_dataset_rele(ohds
, FTAG
);
696 if (spa_version(dp
->dp_spa
) >= SPA_VERSION_NEXT_CLONES
) {
697 if (origin
->ds_phys
->ds_next_clones_obj
== 0) {
698 origin
->ds_phys
->ds_next_clones_obj
=
700 DMU_OT_NEXT_CLONES
, DMU_OT_NONE
, 0, tx
);
702 VERIFY0(zap_add_int(mos
,
703 origin
->ds_phys
->ds_next_clones_obj
, dsobj
, tx
));
706 dmu_buf_will_dirty(dd
->dd_dbuf
, tx
);
707 dd
->dd_phys
->dd_origin_obj
= origin
->ds_object
;
708 if (spa_version(dp
->dp_spa
) >= SPA_VERSION_DIR_CLONES
) {
709 if (origin
->ds_dir
->dd_phys
->dd_clones
== 0) {
710 dmu_buf_will_dirty(origin
->ds_dir
->dd_dbuf
, tx
);
711 origin
->ds_dir
->dd_phys
->dd_clones
=
713 DMU_OT_DSL_CLONES
, DMU_OT_NONE
, 0, tx
);
715 VERIFY0(zap_add_int(mos
,
716 origin
->ds_dir
->dd_phys
->dd_clones
, dsobj
, tx
));
720 if (spa_version(dp
->dp_spa
) >= SPA_VERSION_UNIQUE_ACCURATE
)
721 dsphys
->ds_flags
|= DS_FLAG_UNIQUE_ACCURATE
;
723 dmu_buf_rele(dbuf
, FTAG
);
725 dmu_buf_will_dirty(dd
->dd_dbuf
, tx
);
726 dd
->dd_phys
->dd_head_dataset_obj
= dsobj
;
732 dsl_dataset_zero_zil(dsl_dataset_t
*ds
, dmu_tx_t
*tx
)
736 VERIFY0(dmu_objset_from_ds(ds
, &os
));
737 bzero(&os
->os_zil_header
, sizeof (os
->os_zil_header
));
738 dsl_dataset_dirty(ds
, tx
);
742 dsl_dataset_create_sync(dsl_dir_t
*pdd
, const char *lastname
,
743 dsl_dataset_t
*origin
, uint64_t flags
, cred_t
*cr
, dmu_tx_t
*tx
)
745 dsl_pool_t
*dp
= pdd
->dd_pool
;
746 uint64_t dsobj
, ddobj
;
749 ASSERT(dmu_tx_is_syncing(tx
));
750 ASSERT(lastname
[0] != '@');
752 ddobj
= dsl_dir_create_sync(dp
, pdd
, lastname
, tx
);
753 VERIFY0(dsl_dir_hold_obj(dp
, ddobj
, lastname
, FTAG
, &dd
));
755 dsobj
= dsl_dataset_create_sync_dd(dd
, origin
,
756 flags
& ~DS_CREATE_FLAG_NODIRTY
, tx
);
758 dsl_deleg_set_create_perms(dd
, tx
, cr
);
760 dsl_dir_rele(dd
, FTAG
);
763 * If we are creating a clone, make sure we zero out any stale
764 * data from the origin snapshots zil header.
766 if (origin
!= NULL
&& !(flags
& DS_CREATE_FLAG_NODIRTY
)) {
769 VERIFY0(dsl_dataset_hold_obj(dp
, dsobj
, FTAG
, &ds
));
770 dsl_dataset_zero_zil(ds
, tx
);
771 dsl_dataset_rele(ds
, FTAG
);
778 * The unique space in the head dataset can be calculated by subtracting
779 * the space used in the most recent snapshot, that is still being used
780 * in this file system, from the space currently in use. To figure out
781 * the space in the most recent snapshot still in use, we need to take
782 * the total space used in the snapshot and subtract out the space that
783 * has been freed up since the snapshot was taken.
786 dsl_dataset_recalc_head_uniq(dsl_dataset_t
*ds
)
789 uint64_t dlused
, dlcomp
, dluncomp
;
791 ASSERT(!dsl_dataset_is_snapshot(ds
));
793 if (ds
->ds_phys
->ds_prev_snap_obj
!= 0)
794 mrs_used
= ds
->ds_prev
->ds_phys
->ds_referenced_bytes
;
798 dsl_deadlist_space(&ds
->ds_deadlist
, &dlused
, &dlcomp
, &dluncomp
);
800 ASSERT3U(dlused
, <=, mrs_used
);
801 ds
->ds_phys
->ds_unique_bytes
=
802 ds
->ds_phys
->ds_referenced_bytes
- (mrs_used
- dlused
);
804 if (spa_version(ds
->ds_dir
->dd_pool
->dp_spa
) >=
805 SPA_VERSION_UNIQUE_ACCURATE
)
806 ds
->ds_phys
->ds_flags
|= DS_FLAG_UNIQUE_ACCURATE
;
810 dsl_dataset_remove_from_next_clones(dsl_dataset_t
*ds
, uint64_t obj
,
813 objset_t
*mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
815 ASSERTV(uint64_t count
);
817 ASSERT(ds
->ds_phys
->ds_num_children
>= 2);
818 err
= zap_remove_int(mos
, ds
->ds_phys
->ds_next_clones_obj
, obj
, tx
);
820 * The err should not be ENOENT, but a bug in a previous version
821 * of the code could cause upgrade_clones_cb() to not set
822 * ds_next_snap_obj when it should, leading to a missing entry.
823 * If we knew that the pool was created after
824 * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
825 * ENOENT. However, at least we can check that we don't have
826 * too many entries in the next_clones_obj even after failing to
831 ASSERT0(zap_count(mos
, ds
->ds_phys
->ds_next_clones_obj
,
833 ASSERT3U(count
, <=, ds
->ds_phys
->ds_num_children
- 2);
838 dsl_dataset_get_blkptr(dsl_dataset_t
*ds
)
840 return (&ds
->ds_phys
->ds_bp
);
844 dsl_dataset_set_blkptr(dsl_dataset_t
*ds
, blkptr_t
*bp
, dmu_tx_t
*tx
)
846 ASSERT(dmu_tx_is_syncing(tx
));
847 /* If it's the meta-objset, set dp_meta_rootbp */
849 tx
->tx_pool
->dp_meta_rootbp
= *bp
;
851 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
852 ds
->ds_phys
->ds_bp
= *bp
;
857 dsl_dataset_get_spa(dsl_dataset_t
*ds
)
859 return (ds
->ds_dir
->dd_pool
->dp_spa
);
863 dsl_dataset_dirty(dsl_dataset_t
*ds
, dmu_tx_t
*tx
)
867 if (ds
== NULL
) /* this is the meta-objset */
870 ASSERT(ds
->ds_objset
!= NULL
);
872 if (ds
->ds_phys
->ds_next_snap_obj
!= 0)
873 panic("dirtying snapshot!");
875 dp
= ds
->ds_dir
->dd_pool
;
877 if (txg_list_add(&dp
->dp_dirty_datasets
, ds
, tx
->tx_txg
)) {
878 /* up the hold count until we can be written out */
879 dmu_buf_add_ref(ds
->ds_dbuf
, ds
);
884 dsl_dataset_is_dirty(dsl_dataset_t
*ds
)
888 for (t
= 0; t
< TXG_SIZE
; t
++) {
889 if (txg_list_member(&ds
->ds_dir
->dd_pool
->dp_dirty_datasets
,
897 dsl_dataset_snapshot_reserve_space(dsl_dataset_t
*ds
, dmu_tx_t
*tx
)
901 if (!dmu_tx_is_syncing(tx
))
905 * If there's an fs-only reservation, any blocks that might become
906 * owned by the snapshot dataset must be accommodated by space
907 * outside of the reservation.
909 ASSERT(ds
->ds_reserved
== 0 || DS_UNIQUE_IS_ACCURATE(ds
));
910 asize
= MIN(ds
->ds_phys
->ds_unique_bytes
, ds
->ds_reserved
);
911 if (asize
> dsl_dir_space_available(ds
->ds_dir
, NULL
, 0, TRUE
))
912 return (SET_ERROR(ENOSPC
));
915 * Propagate any reserved space for this snapshot to other
916 * snapshot checks in this sync group.
919 dsl_dir_willuse_space(ds
->ds_dir
, asize
, tx
);
924 typedef struct dsl_dataset_snapshot_arg
{
925 nvlist_t
*ddsa_snaps
;
926 nvlist_t
*ddsa_props
;
927 nvlist_t
*ddsa_errors
;
928 } dsl_dataset_snapshot_arg_t
;
931 dsl_dataset_snapshot_check_impl(dsl_dataset_t
*ds
, const char *snapname
,
932 dmu_tx_t
*tx
, boolean_t recv
)
937 ds
->ds_trysnap_txg
= tx
->tx_txg
;
939 if (!dmu_tx_is_syncing(tx
))
943 * We don't allow multiple snapshots of the same txg. If there
944 * is already one, try again.
946 if (ds
->ds_phys
->ds_prev_snap_txg
>= tx
->tx_txg
)
947 return (SET_ERROR(EAGAIN
));
950 * Check for conflicting snapshot name.
952 error
= dsl_dataset_snap_lookup(ds
, snapname
, &value
);
954 return (SET_ERROR(EEXIST
));
959 * We don't allow taking snapshots of inconsistent datasets, such as
960 * those into which we are currently receiving. However, if we are
961 * creating this snapshot as part of a receive, this check will be
962 * executed atomically with respect to the completion of the receive
963 * itself but prior to the clearing of DS_FLAG_INCONSISTENT; in this
964 * case we ignore this, knowing it will be fixed up for us shortly in
965 * dmu_recv_end_sync().
967 if (!recv
&& DS_IS_INCONSISTENT(ds
))
968 return (SET_ERROR(EBUSY
));
970 error
= dsl_dataset_snapshot_reserve_space(ds
, tx
);
978 dsl_dataset_snapshot_check(void *arg
, dmu_tx_t
*tx
)
980 dsl_dataset_snapshot_arg_t
*ddsa
= arg
;
981 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
985 for (pair
= nvlist_next_nvpair(ddsa
->ddsa_snaps
, NULL
);
986 pair
!= NULL
; pair
= nvlist_next_nvpair(ddsa
->ddsa_snaps
, pair
)) {
990 char dsname
[MAXNAMELEN
];
992 name
= nvpair_name(pair
);
993 if (strlen(name
) >= MAXNAMELEN
)
994 error
= SET_ERROR(ENAMETOOLONG
);
996 atp
= strchr(name
, '@');
998 error
= SET_ERROR(EINVAL
);
1000 (void) strlcpy(dsname
, name
, atp
- name
+ 1);
1003 error
= dsl_dataset_hold(dp
, dsname
, FTAG
, &ds
);
1005 error
= dsl_dataset_snapshot_check_impl(ds
,
1006 atp
+ 1, tx
, B_FALSE
);
1007 dsl_dataset_rele(ds
, FTAG
);
1011 if (ddsa
->ddsa_errors
!= NULL
) {
1012 fnvlist_add_int32(ddsa
->ddsa_errors
,
1022 dsl_dataset_snapshot_sync_impl(dsl_dataset_t
*ds
, const char *snapname
,
1025 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
1027 dsl_dataset_phys_t
*dsphys
;
1028 uint64_t dsobj
, crtxg
;
1029 objset_t
*mos
= dp
->dp_meta_objset
;
1030 ASSERTV(static zil_header_t zero_zil
);
1031 ASSERTV(objset_t
*os
);
1033 ASSERT(RRW_WRITE_HELD(&dp
->dp_config_rwlock
));
1036 * If we are on an old pool, the zil must not be active, in which
1037 * case it will be zeroed. Usually zil_suspend() accomplishes this.
1039 ASSERT(spa_version(dmu_tx_pool(tx
)->dp_spa
) >= SPA_VERSION_FAST_SNAP
||
1040 dmu_objset_from_ds(ds
, &os
) != 0 ||
1041 bcmp(&os
->os_phys
->os_zil_header
, &zero_zil
,
1042 sizeof (zero_zil
)) == 0);
1046 * The origin's ds_creation_txg has to be < TXG_INITIAL
1048 if (strcmp(snapname
, ORIGIN_DIR_NAME
) == 0)
1053 dsobj
= dmu_object_alloc(mos
, DMU_OT_DSL_DATASET
, 0,
1054 DMU_OT_DSL_DATASET
, sizeof (dsl_dataset_phys_t
), tx
);
1055 VERIFY0(dmu_bonus_hold(mos
, dsobj
, FTAG
, &dbuf
));
1056 dmu_buf_will_dirty(dbuf
, tx
);
1057 dsphys
= dbuf
->db_data
;
1058 bzero(dsphys
, sizeof (dsl_dataset_phys_t
));
1059 dsphys
->ds_dir_obj
= ds
->ds_dir
->dd_object
;
1060 dsphys
->ds_fsid_guid
= unique_create();
1061 (void) random_get_pseudo_bytes((void*)&dsphys
->ds_guid
,
1062 sizeof (dsphys
->ds_guid
));
1063 dsphys
->ds_prev_snap_obj
= ds
->ds_phys
->ds_prev_snap_obj
;
1064 dsphys
->ds_prev_snap_txg
= ds
->ds_phys
->ds_prev_snap_txg
;
1065 dsphys
->ds_next_snap_obj
= ds
->ds_object
;
1066 dsphys
->ds_num_children
= 1;
1067 dsphys
->ds_creation_time
= gethrestime_sec();
1068 dsphys
->ds_creation_txg
= crtxg
;
1069 dsphys
->ds_deadlist_obj
= ds
->ds_phys
->ds_deadlist_obj
;
1070 dsphys
->ds_referenced_bytes
= ds
->ds_phys
->ds_referenced_bytes
;
1071 dsphys
->ds_compressed_bytes
= ds
->ds_phys
->ds_compressed_bytes
;
1072 dsphys
->ds_uncompressed_bytes
= ds
->ds_phys
->ds_uncompressed_bytes
;
1073 dsphys
->ds_flags
= ds
->ds_phys
->ds_flags
;
1074 dsphys
->ds_bp
= ds
->ds_phys
->ds_bp
;
1075 dmu_buf_rele(dbuf
, FTAG
);
1077 ASSERT3U(ds
->ds_prev
!= 0, ==, ds
->ds_phys
->ds_prev_snap_obj
!= 0);
1079 uint64_t next_clones_obj
=
1080 ds
->ds_prev
->ds_phys
->ds_next_clones_obj
;
1081 ASSERT(ds
->ds_prev
->ds_phys
->ds_next_snap_obj
==
1083 ds
->ds_prev
->ds_phys
->ds_num_children
> 1);
1084 if (ds
->ds_prev
->ds_phys
->ds_next_snap_obj
== ds
->ds_object
) {
1085 dmu_buf_will_dirty(ds
->ds_prev
->ds_dbuf
, tx
);
1086 ASSERT3U(ds
->ds_phys
->ds_prev_snap_txg
, ==,
1087 ds
->ds_prev
->ds_phys
->ds_creation_txg
);
1088 ds
->ds_prev
->ds_phys
->ds_next_snap_obj
= dsobj
;
1089 } else if (next_clones_obj
!= 0) {
1090 dsl_dataset_remove_from_next_clones(ds
->ds_prev
,
1091 dsphys
->ds_next_snap_obj
, tx
);
1092 VERIFY0(zap_add_int(mos
,
1093 next_clones_obj
, dsobj
, tx
));
1098 * If we have a reference-reservation on this dataset, we will
1099 * need to increase the amount of refreservation being charged
1100 * since our unique space is going to zero.
1102 if (ds
->ds_reserved
) {
1104 ASSERT(DS_UNIQUE_IS_ACCURATE(ds
));
1105 delta
= MIN(ds
->ds_phys
->ds_unique_bytes
, ds
->ds_reserved
);
1106 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_REFRSRV
,
1110 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
1111 ds
->ds_phys
->ds_deadlist_obj
= dsl_deadlist_clone(&ds
->ds_deadlist
,
1112 UINT64_MAX
, ds
->ds_phys
->ds_prev_snap_obj
, tx
);
1113 dsl_deadlist_close(&ds
->ds_deadlist
);
1114 dsl_deadlist_open(&ds
->ds_deadlist
, mos
, ds
->ds_phys
->ds_deadlist_obj
);
1115 dsl_deadlist_add_key(&ds
->ds_deadlist
,
1116 ds
->ds_phys
->ds_prev_snap_txg
, tx
);
1118 ASSERT3U(ds
->ds_phys
->ds_prev_snap_txg
, <, tx
->tx_txg
);
1119 ds
->ds_phys
->ds_prev_snap_obj
= dsobj
;
1120 ds
->ds_phys
->ds_prev_snap_txg
= crtxg
;
1121 ds
->ds_phys
->ds_unique_bytes
= 0;
1122 if (spa_version(dp
->dp_spa
) >= SPA_VERSION_UNIQUE_ACCURATE
)
1123 ds
->ds_phys
->ds_flags
|= DS_FLAG_UNIQUE_ACCURATE
;
1125 VERIFY0(zap_add(mos
, ds
->ds_phys
->ds_snapnames_zapobj
,
1126 snapname
, 8, 1, &dsobj
, tx
));
1129 dsl_dataset_rele(ds
->ds_prev
, ds
);
1130 VERIFY0(dsl_dataset_hold_obj(dp
,
1131 ds
->ds_phys
->ds_prev_snap_obj
, ds
, &ds
->ds_prev
));
1133 dsl_scan_ds_snapshotted(ds
, tx
);
1135 dsl_dir_snap_cmtime_update(ds
->ds_dir
);
1137 spa_history_log_internal_ds(ds
->ds_prev
, "snapshot", tx
, "");
1141 dsl_dataset_snapshot_sync(void *arg
, dmu_tx_t
*tx
)
1143 dsl_dataset_snapshot_arg_t
*ddsa
= arg
;
1144 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
1147 for (pair
= nvlist_next_nvpair(ddsa
->ddsa_snaps
, NULL
);
1148 pair
!= NULL
; pair
= nvlist_next_nvpair(ddsa
->ddsa_snaps
, pair
)) {
1151 char dsname
[MAXNAMELEN
];
1153 name
= nvpair_name(pair
);
1154 atp
= strchr(name
, '@');
1155 (void) strlcpy(dsname
, name
, atp
- name
+ 1);
1156 VERIFY0(dsl_dataset_hold(dp
, dsname
, FTAG
, &ds
));
1158 dsl_dataset_snapshot_sync_impl(ds
, atp
+ 1, tx
);
1159 if (ddsa
->ddsa_props
!= NULL
) {
1160 dsl_props_set_sync_impl(ds
->ds_prev
,
1161 ZPROP_SRC_LOCAL
, ddsa
->ddsa_props
, tx
);
1163 dsl_dataset_rele(ds
, FTAG
);
1168 * The snapshots must all be in the same pool.
1169 * All-or-nothing: if there are any failures, nothing will be modified.
1172 dsl_dataset_snapshot(nvlist_t
*snaps
, nvlist_t
*props
, nvlist_t
*errors
)
1174 dsl_dataset_snapshot_arg_t ddsa
;
1176 boolean_t needsuspend
;
1180 nvlist_t
*suspended
= NULL
;
1182 pair
= nvlist_next_nvpair(snaps
, NULL
);
1185 firstname
= nvpair_name(pair
);
1187 error
= spa_open(firstname
, &spa
, FTAG
);
1190 needsuspend
= (spa_version(spa
) < SPA_VERSION_FAST_SNAP
);
1191 spa_close(spa
, FTAG
);
1194 suspended
= fnvlist_alloc();
1195 for (pair
= nvlist_next_nvpair(snaps
, NULL
); pair
!= NULL
;
1196 pair
= nvlist_next_nvpair(snaps
, pair
)) {
1197 char fsname
[MAXNAMELEN
];
1198 char *snapname
= nvpair_name(pair
);
1202 atp
= strchr(snapname
, '@');
1204 error
= SET_ERROR(EINVAL
);
1207 (void) strlcpy(fsname
, snapname
, atp
- snapname
+ 1);
1209 error
= zil_suspend(fsname
, &cookie
);
1212 fnvlist_add_uint64(suspended
, fsname
,
1217 ddsa
.ddsa_snaps
= snaps
;
1218 ddsa
.ddsa_props
= props
;
1219 ddsa
.ddsa_errors
= errors
;
1222 error
= dsl_sync_task(firstname
, dsl_dataset_snapshot_check
,
1223 dsl_dataset_snapshot_sync
, &ddsa
,
1224 fnvlist_num_pairs(snaps
) * 3);
1227 if (suspended
!= NULL
) {
1228 for (pair
= nvlist_next_nvpair(suspended
, NULL
); pair
!= NULL
;
1229 pair
= nvlist_next_nvpair(suspended
, pair
)) {
1230 zil_resume((void *)(uintptr_t)
1231 fnvpair_value_uint64(pair
));
1233 fnvlist_free(suspended
);
1239 typedef struct dsl_dataset_snapshot_tmp_arg
{
1240 const char *ddsta_fsname
;
1241 const char *ddsta_snapname
;
1242 minor_t ddsta_cleanup_minor
;
1243 const char *ddsta_htag
;
1244 } dsl_dataset_snapshot_tmp_arg_t
;
1247 dsl_dataset_snapshot_tmp_check(void *arg
, dmu_tx_t
*tx
)
1249 dsl_dataset_snapshot_tmp_arg_t
*ddsta
= arg
;
1250 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
1254 error
= dsl_dataset_hold(dp
, ddsta
->ddsta_fsname
, FTAG
, &ds
);
1258 error
= dsl_dataset_snapshot_check_impl(ds
, ddsta
->ddsta_snapname
,
1261 dsl_dataset_rele(ds
, FTAG
);
1265 if (spa_version(dp
->dp_spa
) < SPA_VERSION_USERREFS
) {
1266 dsl_dataset_rele(ds
, FTAG
);
1267 return (SET_ERROR(ENOTSUP
));
1269 error
= dsl_dataset_user_hold_check_one(NULL
, ddsta
->ddsta_htag
,
1272 dsl_dataset_rele(ds
, FTAG
);
1276 dsl_dataset_rele(ds
, FTAG
);
1281 dsl_dataset_snapshot_tmp_sync(void *arg
, dmu_tx_t
*tx
)
1283 dsl_dataset_snapshot_tmp_arg_t
*ddsta
= arg
;
1284 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
1287 VERIFY0(dsl_dataset_hold(dp
, ddsta
->ddsta_fsname
, FTAG
, &ds
));
1289 dsl_dataset_snapshot_sync_impl(ds
, ddsta
->ddsta_snapname
, tx
);
1290 dsl_dataset_user_hold_sync_one(ds
->ds_prev
, ddsta
->ddsta_htag
,
1291 ddsta
->ddsta_cleanup_minor
, gethrestime_sec(), tx
);
1292 dsl_destroy_snapshot_sync_impl(ds
->ds_prev
, B_TRUE
, tx
);
1294 dsl_dataset_rele(ds
, FTAG
);
1298 dsl_dataset_snapshot_tmp(const char *fsname
, const char *snapname
,
1299 minor_t cleanup_minor
, const char *htag
)
1301 dsl_dataset_snapshot_tmp_arg_t ddsta
;
1304 boolean_t needsuspend
;
1307 ddsta
.ddsta_fsname
= fsname
;
1308 ddsta
.ddsta_snapname
= snapname
;
1309 ddsta
.ddsta_cleanup_minor
= cleanup_minor
;
1310 ddsta
.ddsta_htag
= htag
;
1312 error
= spa_open(fsname
, &spa
, FTAG
);
1315 needsuspend
= (spa_version(spa
) < SPA_VERSION_FAST_SNAP
);
1316 spa_close(spa
, FTAG
);
1319 error
= zil_suspend(fsname
, &cookie
);
1324 error
= dsl_sync_task(fsname
, dsl_dataset_snapshot_tmp_check
,
1325 dsl_dataset_snapshot_tmp_sync
, &ddsta
, 3);
1334 dsl_dataset_sync(dsl_dataset_t
*ds
, zio_t
*zio
, dmu_tx_t
*tx
)
1336 ASSERT(dmu_tx_is_syncing(tx
));
1337 ASSERT(ds
->ds_objset
!= NULL
);
1338 ASSERT(ds
->ds_phys
->ds_next_snap_obj
== 0);
1341 * in case we had to change ds_fsid_guid when we opened it,
1344 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
1345 ds
->ds_phys
->ds_fsid_guid
= ds
->ds_fsid_guid
;
1347 dmu_objset_sync(ds
->ds_objset
, zio
, tx
);
1351 get_clones_stat(dsl_dataset_t
*ds
, nvlist_t
*nv
)
1354 objset_t
*mos
= ds
->ds_dir
->dd_pool
->dp_meta_objset
;
1357 nvlist_t
*propval
= fnvlist_alloc();
1358 nvlist_t
*val
= fnvlist_alloc();
1360 ASSERT(dsl_pool_config_held(ds
->ds_dir
->dd_pool
));
1363 * There may be missing entries in ds_next_clones_obj
1364 * due to a bug in a previous version of the code.
1365 * Only trust it if it has the right number of entries.
1367 if (ds
->ds_phys
->ds_next_clones_obj
!= 0) {
1368 ASSERT0(zap_count(mos
, ds
->ds_phys
->ds_next_clones_obj
,
1371 if (count
!= ds
->ds_phys
->ds_num_children
- 1)
1373 for (zap_cursor_init(&zc
, mos
, ds
->ds_phys
->ds_next_clones_obj
);
1374 zap_cursor_retrieve(&zc
, &za
) == 0;
1375 zap_cursor_advance(&zc
)) {
1376 dsl_dataset_t
*clone
;
1377 char buf
[ZFS_MAXNAMELEN
];
1378 VERIFY0(dsl_dataset_hold_obj(ds
->ds_dir
->dd_pool
,
1379 za
.za_first_integer
, FTAG
, &clone
));
1380 dsl_dir_name(clone
->ds_dir
, buf
);
1381 fnvlist_add_boolean(val
, buf
);
1382 dsl_dataset_rele(clone
, FTAG
);
1384 zap_cursor_fini(&zc
);
1385 fnvlist_add_nvlist(propval
, ZPROP_VALUE
, val
);
1386 fnvlist_add_nvlist(nv
, zfs_prop_to_name(ZFS_PROP_CLONES
), propval
);
1389 nvlist_free(propval
);
1393 dsl_dataset_stats(dsl_dataset_t
*ds
, nvlist_t
*nv
)
1395 uint64_t refd
, avail
, uobjs
, aobjs
, ratio
;
1396 ASSERTV(dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
);
1398 ASSERT(dsl_pool_config_held(dp
));
1400 ratio
= ds
->ds_phys
->ds_compressed_bytes
== 0 ? 100 :
1401 (ds
->ds_phys
->ds_uncompressed_bytes
* 100 /
1402 ds
->ds_phys
->ds_compressed_bytes
);
1404 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_REFRATIO
, ratio
);
1405 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_LOGICALREFERENCED
,
1406 ds
->ds_phys
->ds_uncompressed_bytes
);
1408 if (dsl_dataset_is_snapshot(ds
)) {
1409 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_COMPRESSRATIO
, ratio
);
1410 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_USED
,
1411 ds
->ds_phys
->ds_unique_bytes
);
1412 get_clones_stat(ds
, nv
);
1414 dsl_dir_stats(ds
->ds_dir
, nv
);
1417 dsl_dataset_space(ds
, &refd
, &avail
, &uobjs
, &aobjs
);
1418 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_AVAILABLE
, avail
);
1419 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_REFERENCED
, refd
);
1421 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_CREATION
,
1422 ds
->ds_phys
->ds_creation_time
);
1423 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_CREATETXG
,
1424 ds
->ds_phys
->ds_creation_txg
);
1425 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_REFQUOTA
,
1427 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_REFRESERVATION
,
1429 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_GUID
,
1430 ds
->ds_phys
->ds_guid
);
1431 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_UNIQUE
,
1432 ds
->ds_phys
->ds_unique_bytes
);
1433 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_OBJSETID
,
1435 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_USERREFS
,
1437 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_DEFER_DESTROY
,
1438 DS_IS_DEFER_DESTROY(ds
) ? 1 : 0);
1440 if (ds
->ds_phys
->ds_prev_snap_obj
!= 0) {
1441 uint64_t written
, comp
, uncomp
;
1442 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
1443 dsl_dataset_t
*prev
;
1446 err
= dsl_dataset_hold_obj(dp
,
1447 ds
->ds_phys
->ds_prev_snap_obj
, FTAG
, &prev
);
1449 err
= dsl_dataset_space_written(prev
, ds
, &written
,
1451 dsl_dataset_rele(prev
, FTAG
);
1453 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_WRITTEN
,
1462 dsl_dataset_fast_stat(dsl_dataset_t
*ds
, dmu_objset_stats_t
*stat
)
1464 dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
;
1465 ASSERT(dsl_pool_config_held(dp
));
1467 stat
->dds_creation_txg
= ds
->ds_phys
->ds_creation_txg
;
1468 stat
->dds_inconsistent
= ds
->ds_phys
->ds_flags
& DS_FLAG_INCONSISTENT
;
1469 stat
->dds_guid
= ds
->ds_phys
->ds_guid
;
1470 stat
->dds_origin
[0] = '\0';
1471 if (dsl_dataset_is_snapshot(ds
)) {
1472 stat
->dds_is_snapshot
= B_TRUE
;
1473 stat
->dds_num_clones
= ds
->ds_phys
->ds_num_children
- 1;
1475 stat
->dds_is_snapshot
= B_FALSE
;
1476 stat
->dds_num_clones
= 0;
1478 if (dsl_dir_is_clone(ds
->ds_dir
)) {
1481 VERIFY0(dsl_dataset_hold_obj(dp
,
1482 ds
->ds_dir
->dd_phys
->dd_origin_obj
, FTAG
, &ods
));
1483 dsl_dataset_name(ods
, stat
->dds_origin
);
1484 dsl_dataset_rele(ods
, FTAG
);
1490 dsl_dataset_fsid_guid(dsl_dataset_t
*ds
)
1492 return (ds
->ds_fsid_guid
);
1496 dsl_dataset_space(dsl_dataset_t
*ds
,
1497 uint64_t *refdbytesp
, uint64_t *availbytesp
,
1498 uint64_t *usedobjsp
, uint64_t *availobjsp
)
1500 *refdbytesp
= ds
->ds_phys
->ds_referenced_bytes
;
1501 *availbytesp
= dsl_dir_space_available(ds
->ds_dir
, NULL
, 0, TRUE
);
1502 if (ds
->ds_reserved
> ds
->ds_phys
->ds_unique_bytes
)
1503 *availbytesp
+= ds
->ds_reserved
- ds
->ds_phys
->ds_unique_bytes
;
1504 if (ds
->ds_quota
!= 0) {
1506 * Adjust available bytes according to refquota
1508 if (*refdbytesp
< ds
->ds_quota
)
1509 *availbytesp
= MIN(*availbytesp
,
1510 ds
->ds_quota
- *refdbytesp
);
1514 *usedobjsp
= ds
->ds_phys
->ds_bp
.blk_fill
;
1515 *availobjsp
= DN_MAX_OBJECT
- *usedobjsp
;
1519 dsl_dataset_modified_since_lastsnap(dsl_dataset_t
*ds
)
1521 ASSERTV(dsl_pool_t
*dp
= ds
->ds_dir
->dd_pool
);
1523 ASSERT(dsl_pool_config_held(dp
));
1524 if (ds
->ds_prev
== NULL
)
1526 if (ds
->ds_phys
->ds_bp
.blk_birth
>
1527 ds
->ds_prev
->ds_phys
->ds_creation_txg
) {
1528 objset_t
*os
, *os_prev
;
1530 * It may be that only the ZIL differs, because it was
1531 * reset in the head. Don't count that as being
1534 if (dmu_objset_from_ds(ds
, &os
) != 0)
1536 if (dmu_objset_from_ds(ds
->ds_prev
, &os_prev
) != 0)
1538 return (bcmp(&os
->os_phys
->os_meta_dnode
,
1539 &os_prev
->os_phys
->os_meta_dnode
,
1540 sizeof (os
->os_phys
->os_meta_dnode
)) != 0);
1545 typedef struct dsl_dataset_rename_snapshot_arg
{
1546 const char *ddrsa_fsname
;
1547 const char *ddrsa_oldsnapname
;
1548 const char *ddrsa_newsnapname
;
1549 boolean_t ddrsa_recursive
;
1551 } dsl_dataset_rename_snapshot_arg_t
;
1555 dsl_dataset_rename_snapshot_check_impl(dsl_pool_t
*dp
,
1556 dsl_dataset_t
*hds
, void *arg
)
1558 dsl_dataset_rename_snapshot_arg_t
*ddrsa
= arg
;
1562 error
= dsl_dataset_snap_lookup(hds
, ddrsa
->ddrsa_oldsnapname
, &val
);
1564 /* ignore nonexistent snapshots */
1565 return (error
== ENOENT
? 0 : error
);
1568 /* new name should not exist */
1569 error
= dsl_dataset_snap_lookup(hds
, ddrsa
->ddrsa_newsnapname
, &val
);
1571 error
= SET_ERROR(EEXIST
);
1572 else if (error
== ENOENT
)
1575 /* dataset name + 1 for the "@" + the new snapshot name must fit */
1576 if (dsl_dir_namelen(hds
->ds_dir
) + 1 +
1577 strlen(ddrsa
->ddrsa_newsnapname
) >= MAXNAMELEN
)
1578 error
= SET_ERROR(ENAMETOOLONG
);
1584 dsl_dataset_rename_snapshot_check(void *arg
, dmu_tx_t
*tx
)
1586 dsl_dataset_rename_snapshot_arg_t
*ddrsa
= arg
;
1587 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
1591 error
= dsl_dataset_hold(dp
, ddrsa
->ddrsa_fsname
, FTAG
, &hds
);
1595 if (ddrsa
->ddrsa_recursive
) {
1596 error
= dmu_objset_find_dp(dp
, hds
->ds_dir
->dd_object
,
1597 dsl_dataset_rename_snapshot_check_impl
, ddrsa
,
1600 error
= dsl_dataset_rename_snapshot_check_impl(dp
, hds
, ddrsa
);
1602 dsl_dataset_rele(hds
, FTAG
);
1607 dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t
*dp
,
1608 dsl_dataset_t
*hds
, void *arg
)
1610 dsl_dataset_rename_snapshot_arg_t
*ddrsa
= arg
;
1613 dmu_tx_t
*tx
= ddrsa
->ddrsa_tx
;
1616 error
= dsl_dataset_snap_lookup(hds
, ddrsa
->ddrsa_oldsnapname
, &val
);
1617 ASSERT(error
== 0 || error
== ENOENT
);
1618 if (error
== ENOENT
) {
1619 /* ignore nonexistent snapshots */
1623 VERIFY0(dsl_dataset_hold_obj(dp
, val
, FTAG
, &ds
));
1625 /* log before we change the name */
1626 spa_history_log_internal_ds(ds
, "rename", tx
,
1627 "-> @%s", ddrsa
->ddrsa_newsnapname
);
1629 VERIFY0(dsl_dataset_snap_remove(hds
, ddrsa
->ddrsa_oldsnapname
, tx
));
1630 mutex_enter(&ds
->ds_lock
);
1631 (void) strcpy(ds
->ds_snapname
, ddrsa
->ddrsa_newsnapname
);
1632 mutex_exit(&ds
->ds_lock
);
1633 VERIFY0(zap_add(dp
->dp_meta_objset
, hds
->ds_phys
->ds_snapnames_zapobj
,
1634 ds
->ds_snapname
, 8, 1, &ds
->ds_object
, tx
));
1636 dsl_dataset_rele(ds
, FTAG
);
1641 dsl_dataset_rename_snapshot_sync(void *arg
, dmu_tx_t
*tx
)
1643 dsl_dataset_rename_snapshot_arg_t
*ddrsa
= arg
;
1644 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
1647 VERIFY0(dsl_dataset_hold(dp
, ddrsa
->ddrsa_fsname
, FTAG
, &hds
));
1648 ddrsa
->ddrsa_tx
= tx
;
1649 if (ddrsa
->ddrsa_recursive
) {
1650 VERIFY0(dmu_objset_find_dp(dp
, hds
->ds_dir
->dd_object
,
1651 dsl_dataset_rename_snapshot_sync_impl
, ddrsa
,
1654 VERIFY0(dsl_dataset_rename_snapshot_sync_impl(dp
, hds
, ddrsa
));
1656 dsl_dataset_rele(hds
, FTAG
);
1660 dsl_dataset_rename_snapshot(const char *fsname
,
1661 const char *oldsnapname
, const char *newsnapname
, boolean_t recursive
)
1663 dsl_dataset_rename_snapshot_arg_t ddrsa
;
1665 ddrsa
.ddrsa_fsname
= fsname
;
1666 ddrsa
.ddrsa_oldsnapname
= oldsnapname
;
1667 ddrsa
.ddrsa_newsnapname
= newsnapname
;
1668 ddrsa
.ddrsa_recursive
= recursive
;
1670 return (dsl_sync_task(fsname
, dsl_dataset_rename_snapshot_check
,
1671 dsl_dataset_rename_snapshot_sync
, &ddrsa
, 1));
1675 dsl_dataset_rollback_check(void *arg
, dmu_tx_t
*tx
)
1677 const char *fsname
= arg
;
1678 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
1680 int64_t unused_refres_delta
;
1683 error
= dsl_dataset_hold(dp
, fsname
, FTAG
, &ds
);
1687 /* must not be a snapshot */
1688 if (dsl_dataset_is_snapshot(ds
)) {
1689 dsl_dataset_rele(ds
, FTAG
);
1690 return (SET_ERROR(EINVAL
));
1693 /* must have a most recent snapshot */
1694 if (ds
->ds_phys
->ds_prev_snap_txg
< TXG_INITIAL
) {
1695 dsl_dataset_rele(ds
, FTAG
);
1696 return (SET_ERROR(EINVAL
));
1699 if (dsl_dataset_long_held(ds
)) {
1700 dsl_dataset_rele(ds
, FTAG
);
1701 return (SET_ERROR(EBUSY
));
1705 * Check if the snap we are rolling back to uses more than
1708 if (ds
->ds_quota
!= 0 &&
1709 ds
->ds_prev
->ds_phys
->ds_referenced_bytes
> ds
->ds_quota
) {
1710 dsl_dataset_rele(ds
, FTAG
);
1711 return (SET_ERROR(EDQUOT
));
1715 * When we do the clone swap, we will temporarily use more space
1716 * due to the refreservation (the head will no longer have any
1717 * unique space, so the entire amount of the refreservation will need
1718 * to be free). We will immediately destroy the clone, freeing
1719 * this space, but the freeing happens over many txg's.
1721 unused_refres_delta
= (int64_t)MIN(ds
->ds_reserved
,
1722 ds
->ds_phys
->ds_unique_bytes
);
1724 if (unused_refres_delta
> 0 &&
1725 unused_refres_delta
>
1726 dsl_dir_space_available(ds
->ds_dir
, NULL
, 0, TRUE
)) {
1727 dsl_dataset_rele(ds
, FTAG
);
1728 return (SET_ERROR(ENOSPC
));
1731 dsl_dataset_rele(ds
, FTAG
);
1736 dsl_dataset_rollback_sync(void *arg
, dmu_tx_t
*tx
)
1738 const char *fsname
= arg
;
1739 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
1740 dsl_dataset_t
*ds
, *clone
;
1743 VERIFY0(dsl_dataset_hold(dp
, fsname
, FTAG
, &ds
));
1745 cloneobj
= dsl_dataset_create_sync(ds
->ds_dir
, "%rollback",
1746 ds
->ds_prev
, DS_CREATE_FLAG_NODIRTY
, kcred
, tx
);
1748 VERIFY0(dsl_dataset_hold_obj(dp
, cloneobj
, FTAG
, &clone
));
1750 dsl_dataset_clone_swap_sync_impl(clone
, ds
, tx
);
1751 dsl_dataset_zero_zil(ds
, tx
);
1753 dsl_destroy_head_sync_impl(clone
, tx
);
1755 dsl_dataset_rele(clone
, FTAG
);
1756 dsl_dataset_rele(ds
, FTAG
);
1760 dsl_dataset_rollback(const char *fsname
)
1762 return (dsl_sync_task(fsname
, dsl_dataset_rollback_check
,
1763 dsl_dataset_rollback_sync
, (void *)fsname
, 1));
1766 struct promotenode
{
1771 typedef struct dsl_dataset_promote_arg
{
1772 const char *ddpa_clonename
;
1773 dsl_dataset_t
*ddpa_clone
;
1774 list_t shared_snaps
, origin_snaps
, clone_snaps
;
1775 dsl_dataset_t
*origin_origin
; /* origin of the origin */
1776 uint64_t used
, comp
, uncomp
, unique
, cloneusedsnap
, originusedsnap
;
1778 } dsl_dataset_promote_arg_t
;
1780 static int snaplist_space(list_t
*l
, uint64_t mintxg
, uint64_t *spacep
);
1781 static int promote_hold(dsl_dataset_promote_arg_t
*ddpa
, dsl_pool_t
*dp
,
1783 static void promote_rele(dsl_dataset_promote_arg_t
*ddpa
, void *tag
);
1786 dsl_dataset_promote_check(void *arg
, dmu_tx_t
*tx
)
1788 dsl_dataset_promote_arg_t
*ddpa
= arg
;
1789 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
1791 struct promotenode
*snap
;
1792 dsl_dataset_t
*origin_ds
;
1796 err
= promote_hold(ddpa
, dp
, FTAG
);
1800 hds
= ddpa
->ddpa_clone
;
1802 if (hds
->ds_phys
->ds_flags
& DS_FLAG_NOPROMOTE
) {
1803 promote_rele(ddpa
, FTAG
);
1804 return (SET_ERROR(EXDEV
));
1808 * Compute and check the amount of space to transfer. Since this is
1809 * so expensive, don't do the preliminary check.
1811 if (!dmu_tx_is_syncing(tx
)) {
1812 promote_rele(ddpa
, FTAG
);
1816 snap
= list_head(&ddpa
->shared_snaps
);
1817 origin_ds
= snap
->ds
;
1819 /* compute origin's new unique space */
1820 snap
= list_tail(&ddpa
->clone_snaps
);
1821 ASSERT3U(snap
->ds
->ds_phys
->ds_prev_snap_obj
, ==, origin_ds
->ds_object
);
1822 dsl_deadlist_space_range(&snap
->ds
->ds_deadlist
,
1823 origin_ds
->ds_phys
->ds_prev_snap_txg
, UINT64_MAX
,
1824 &ddpa
->unique
, &unused
, &unused
);
1827 * Walk the snapshots that we are moving
1829 * Compute space to transfer. Consider the incremental changes
1830 * to used by each snapshot:
1831 * (my used) = (prev's used) + (blocks born) - (blocks killed)
1832 * So each snapshot gave birth to:
1833 * (blocks born) = (my used) - (prev's used) + (blocks killed)
1834 * So a sequence would look like:
1835 * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
1836 * Which simplifies to:
1837 * uN + kN + kN-1 + ... + k1 + k0
1838 * Note however, if we stop before we reach the ORIGIN we get:
1839 * uN + kN + kN-1 + ... + kM - uM-1
1841 ddpa
->used
= origin_ds
->ds_phys
->ds_referenced_bytes
;
1842 ddpa
->comp
= origin_ds
->ds_phys
->ds_compressed_bytes
;
1843 ddpa
->uncomp
= origin_ds
->ds_phys
->ds_uncompressed_bytes
;
1844 for (snap
= list_head(&ddpa
->shared_snaps
); snap
;
1845 snap
= list_next(&ddpa
->shared_snaps
, snap
)) {
1846 uint64_t val
, dlused
, dlcomp
, dluncomp
;
1847 dsl_dataset_t
*ds
= snap
->ds
;
1850 * If there are long holds, we won't be able to evict
1853 if (dsl_dataset_long_held(ds
)) {
1854 err
= SET_ERROR(EBUSY
);
1858 /* Check that the snapshot name does not conflict */
1859 VERIFY0(dsl_dataset_get_snapname(ds
));
1860 err
= dsl_dataset_snap_lookup(hds
, ds
->ds_snapname
, &val
);
1862 (void) strcpy(ddpa
->err_ds
, snap
->ds
->ds_snapname
);
1863 err
= SET_ERROR(EEXIST
);
1869 /* The very first snapshot does not have a deadlist */
1870 if (ds
->ds_phys
->ds_prev_snap_obj
== 0)
1873 dsl_deadlist_space(&ds
->ds_deadlist
,
1874 &dlused
, &dlcomp
, &dluncomp
);
1875 ddpa
->used
+= dlused
;
1876 ddpa
->comp
+= dlcomp
;
1877 ddpa
->uncomp
+= dluncomp
;
1881 * If we are a clone of a clone then we never reached ORIGIN,
1882 * so we need to subtract out the clone origin's used space.
1884 if (ddpa
->origin_origin
) {
1885 ddpa
->used
-= ddpa
->origin_origin
->ds_phys
->ds_referenced_bytes
;
1886 ddpa
->comp
-= ddpa
->origin_origin
->ds_phys
->ds_compressed_bytes
;
1888 ddpa
->origin_origin
->ds_phys
->ds_uncompressed_bytes
;
1891 /* Check that there is enough space here */
1892 err
= dsl_dir_transfer_possible(origin_ds
->ds_dir
, hds
->ds_dir
,
1898 * Compute the amounts of space that will be used by snapshots
1899 * after the promotion (for both origin and clone). For each,
1900 * it is the amount of space that will be on all of their
1901 * deadlists (that was not born before their new origin).
1903 if (hds
->ds_dir
->dd_phys
->dd_flags
& DD_FLAG_USED_BREAKDOWN
) {
1907 * Note, typically this will not be a clone of a clone,
1908 * so dd_origin_txg will be < TXG_INITIAL, so
1909 * these snaplist_space() -> dsl_deadlist_space_range()
1910 * calls will be fast because they do not have to
1911 * iterate over all bps.
1913 snap
= list_head(&ddpa
->origin_snaps
);
1914 err
= snaplist_space(&ddpa
->shared_snaps
,
1915 snap
->ds
->ds_dir
->dd_origin_txg
, &ddpa
->cloneusedsnap
);
1919 err
= snaplist_space(&ddpa
->clone_snaps
,
1920 snap
->ds
->ds_dir
->dd_origin_txg
, &space
);
1923 ddpa
->cloneusedsnap
+= space
;
1925 if (origin_ds
->ds_dir
->dd_phys
->dd_flags
& DD_FLAG_USED_BREAKDOWN
) {
1926 err
= snaplist_space(&ddpa
->origin_snaps
,
1927 origin_ds
->ds_phys
->ds_creation_txg
, &ddpa
->originusedsnap
);
1933 promote_rele(ddpa
, FTAG
);
1938 dsl_dataset_promote_sync(void *arg
, dmu_tx_t
*tx
)
1940 dsl_dataset_promote_arg_t
*ddpa
= arg
;
1941 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
1943 struct promotenode
*snap
;
1944 dsl_dataset_t
*origin_ds
;
1945 dsl_dataset_t
*origin_head
;
1947 dsl_dir_t
*odd
= NULL
;
1948 uint64_t oldnext_obj
;
1951 VERIFY0(promote_hold(ddpa
, dp
, FTAG
));
1952 hds
= ddpa
->ddpa_clone
;
1954 ASSERT0(hds
->ds_phys
->ds_flags
& DS_FLAG_NOPROMOTE
);
1956 snap
= list_head(&ddpa
->shared_snaps
);
1957 origin_ds
= snap
->ds
;
1960 snap
= list_head(&ddpa
->origin_snaps
);
1961 origin_head
= snap
->ds
;
1964 * We need to explicitly open odd, since origin_ds's dd will be
1967 VERIFY0(dsl_dir_hold_obj(dp
, origin_ds
->ds_dir
->dd_object
,
1970 /* change origin's next snap */
1971 dmu_buf_will_dirty(origin_ds
->ds_dbuf
, tx
);
1972 oldnext_obj
= origin_ds
->ds_phys
->ds_next_snap_obj
;
1973 snap
= list_tail(&ddpa
->clone_snaps
);
1974 ASSERT3U(snap
->ds
->ds_phys
->ds_prev_snap_obj
, ==, origin_ds
->ds_object
);
1975 origin_ds
->ds_phys
->ds_next_snap_obj
= snap
->ds
->ds_object
;
1977 /* change the origin's next clone */
1978 if (origin_ds
->ds_phys
->ds_next_clones_obj
) {
1979 dsl_dataset_remove_from_next_clones(origin_ds
,
1980 snap
->ds
->ds_object
, tx
);
1981 VERIFY0(zap_add_int(dp
->dp_meta_objset
,
1982 origin_ds
->ds_phys
->ds_next_clones_obj
,
1987 dmu_buf_will_dirty(dd
->dd_dbuf
, tx
);
1988 ASSERT3U(dd
->dd_phys
->dd_origin_obj
, ==, origin_ds
->ds_object
);
1989 dd
->dd_phys
->dd_origin_obj
= odd
->dd_phys
->dd_origin_obj
;
1990 dd
->dd_origin_txg
= origin_head
->ds_dir
->dd_origin_txg
;
1991 dmu_buf_will_dirty(odd
->dd_dbuf
, tx
);
1992 odd
->dd_phys
->dd_origin_obj
= origin_ds
->ds_object
;
1993 origin_head
->ds_dir
->dd_origin_txg
=
1994 origin_ds
->ds_phys
->ds_creation_txg
;
1996 /* change dd_clone entries */
1997 if (spa_version(dp
->dp_spa
) >= SPA_VERSION_DIR_CLONES
) {
1998 VERIFY0(zap_remove_int(dp
->dp_meta_objset
,
1999 odd
->dd_phys
->dd_clones
, hds
->ds_object
, tx
));
2000 VERIFY0(zap_add_int(dp
->dp_meta_objset
,
2001 ddpa
->origin_origin
->ds_dir
->dd_phys
->dd_clones
,
2002 hds
->ds_object
, tx
));
2004 VERIFY0(zap_remove_int(dp
->dp_meta_objset
,
2005 ddpa
->origin_origin
->ds_dir
->dd_phys
->dd_clones
,
2006 origin_head
->ds_object
, tx
));
2007 if (dd
->dd_phys
->dd_clones
== 0) {
2008 dd
->dd_phys
->dd_clones
= zap_create(dp
->dp_meta_objset
,
2009 DMU_OT_DSL_CLONES
, DMU_OT_NONE
, 0, tx
);
2011 VERIFY0(zap_add_int(dp
->dp_meta_objset
,
2012 dd
->dd_phys
->dd_clones
, origin_head
->ds_object
, tx
));
2015 /* move snapshots to this dir */
2016 for (snap
= list_head(&ddpa
->shared_snaps
); snap
;
2017 snap
= list_next(&ddpa
->shared_snaps
, snap
)) {
2018 dsl_dataset_t
*ds
= snap
->ds
;
2021 * Property callbacks are registered to a particular
2022 * dsl_dir. Since ours is changing, evict the objset
2023 * so that they will be unregistered from the old dsl_dir.
2025 if (ds
->ds_objset
) {
2026 dmu_objset_evict(ds
->ds_objset
);
2027 ds
->ds_objset
= NULL
;
2030 /* move snap name entry */
2031 VERIFY0(dsl_dataset_get_snapname(ds
));
2032 VERIFY0(dsl_dataset_snap_remove(origin_head
,
2033 ds
->ds_snapname
, tx
));
2034 VERIFY0(zap_add(dp
->dp_meta_objset
,
2035 hds
->ds_phys
->ds_snapnames_zapobj
, ds
->ds_snapname
,
2036 8, 1, &ds
->ds_object
, tx
));
2038 /* change containing dsl_dir */
2039 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
2040 ASSERT3U(ds
->ds_phys
->ds_dir_obj
, ==, odd
->dd_object
);
2041 ds
->ds_phys
->ds_dir_obj
= dd
->dd_object
;
2042 ASSERT3P(ds
->ds_dir
, ==, odd
);
2043 dsl_dir_rele(ds
->ds_dir
, ds
);
2044 VERIFY0(dsl_dir_hold_obj(dp
, dd
->dd_object
,
2045 NULL
, ds
, &ds
->ds_dir
));
2047 /* move any clone references */
2048 if (ds
->ds_phys
->ds_next_clones_obj
&&
2049 spa_version(dp
->dp_spa
) >= SPA_VERSION_DIR_CLONES
) {
2053 for (zap_cursor_init(&zc
, dp
->dp_meta_objset
,
2054 ds
->ds_phys
->ds_next_clones_obj
);
2055 zap_cursor_retrieve(&zc
, &za
) == 0;
2056 zap_cursor_advance(&zc
)) {
2057 dsl_dataset_t
*cnds
;
2060 if (za
.za_first_integer
== oldnext_obj
) {
2062 * We've already moved the
2063 * origin's reference.
2068 VERIFY0(dsl_dataset_hold_obj(dp
,
2069 za
.za_first_integer
, FTAG
, &cnds
));
2070 o
= cnds
->ds_dir
->dd_phys
->dd_head_dataset_obj
;
2072 VERIFY0(zap_remove_int(dp
->dp_meta_objset
,
2073 odd
->dd_phys
->dd_clones
, o
, tx
));
2074 VERIFY0(zap_add_int(dp
->dp_meta_objset
,
2075 dd
->dd_phys
->dd_clones
, o
, tx
));
2076 dsl_dataset_rele(cnds
, FTAG
);
2078 zap_cursor_fini(&zc
);
2081 ASSERT(!dsl_prop_hascb(ds
));
2085 * Change space accounting.
2086 * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
2087 * both be valid, or both be 0 (resulting in delta == 0). This
2088 * is true for each of {clone,origin} independently.
2091 delta
= ddpa
->cloneusedsnap
-
2092 dd
->dd_phys
->dd_used_breakdown
[DD_USED_SNAP
];
2093 ASSERT3S(delta
, >=, 0);
2094 ASSERT3U(ddpa
->used
, >=, delta
);
2095 dsl_dir_diduse_space(dd
, DD_USED_SNAP
, delta
, 0, 0, tx
);
2096 dsl_dir_diduse_space(dd
, DD_USED_HEAD
,
2097 ddpa
->used
- delta
, ddpa
->comp
, ddpa
->uncomp
, tx
);
2099 delta
= ddpa
->originusedsnap
-
2100 odd
->dd_phys
->dd_used_breakdown
[DD_USED_SNAP
];
2101 ASSERT3S(delta
, <=, 0);
2102 ASSERT3U(ddpa
->used
, >=, -delta
);
2103 dsl_dir_diduse_space(odd
, DD_USED_SNAP
, delta
, 0, 0, tx
);
2104 dsl_dir_diduse_space(odd
, DD_USED_HEAD
,
2105 -ddpa
->used
- delta
, -ddpa
->comp
, -ddpa
->uncomp
, tx
);
2107 origin_ds
->ds_phys
->ds_unique_bytes
= ddpa
->unique
;
2109 /* log history record */
2110 spa_history_log_internal_ds(hds
, "promote", tx
, "");
2112 dsl_dir_rele(odd
, FTAG
);
2113 promote_rele(ddpa
, FTAG
);
2117 * Make a list of dsl_dataset_t's for the snapshots between first_obj
2118 * (exclusive) and last_obj (inclusive). The list will be in reverse
2119 * order (last_obj will be the list_head()). If first_obj == 0, do all
2120 * snapshots back to this dataset's origin.
2123 snaplist_make(dsl_pool_t
*dp
,
2124 uint64_t first_obj
, uint64_t last_obj
, list_t
*l
, void *tag
)
2126 uint64_t obj
= last_obj
;
2128 list_create(l
, sizeof (struct promotenode
),
2129 offsetof(struct promotenode
, link
));
2131 while (obj
!= first_obj
) {
2133 struct promotenode
*snap
;
2136 err
= dsl_dataset_hold_obj(dp
, obj
, tag
, &ds
);
2137 ASSERT(err
!= ENOENT
);
2142 first_obj
= ds
->ds_dir
->dd_phys
->dd_origin_obj
;
2144 snap
= kmem_alloc(sizeof (*snap
), KM_SLEEP
);
2146 list_insert_tail(l
, snap
);
2147 obj
= ds
->ds_phys
->ds_prev_snap_obj
;
2154 snaplist_space(list_t
*l
, uint64_t mintxg
, uint64_t *spacep
)
2156 struct promotenode
*snap
;
2159 for (snap
= list_head(l
); snap
; snap
= list_next(l
, snap
)) {
2160 uint64_t used
, comp
, uncomp
;
2161 dsl_deadlist_space_range(&snap
->ds
->ds_deadlist
,
2162 mintxg
, UINT64_MAX
, &used
, &comp
, &uncomp
);
2169 snaplist_destroy(list_t
*l
, void *tag
)
2171 struct promotenode
*snap
;
2173 if (l
== NULL
|| !list_link_active(&l
->list_head
))
2176 while ((snap
= list_tail(l
)) != NULL
) {
2177 list_remove(l
, snap
);
2178 dsl_dataset_rele(snap
->ds
, tag
);
2179 kmem_free(snap
, sizeof (*snap
));
2185 promote_hold(dsl_dataset_promote_arg_t
*ddpa
, dsl_pool_t
*dp
, void *tag
)
2189 struct promotenode
*snap
;
2191 error
= dsl_dataset_hold(dp
, ddpa
->ddpa_clonename
, tag
,
2195 dd
= ddpa
->ddpa_clone
->ds_dir
;
2197 if (dsl_dataset_is_snapshot(ddpa
->ddpa_clone
) ||
2198 !dsl_dir_is_clone(dd
)) {
2199 dsl_dataset_rele(ddpa
->ddpa_clone
, tag
);
2200 return (SET_ERROR(EINVAL
));
2203 error
= snaplist_make(dp
, 0, dd
->dd_phys
->dd_origin_obj
,
2204 &ddpa
->shared_snaps
, tag
);
2208 error
= snaplist_make(dp
, 0, ddpa
->ddpa_clone
->ds_object
,
2209 &ddpa
->clone_snaps
, tag
);
2213 snap
= list_head(&ddpa
->shared_snaps
);
2214 ASSERT3U(snap
->ds
->ds_object
, ==, dd
->dd_phys
->dd_origin_obj
);
2215 error
= snaplist_make(dp
, dd
->dd_phys
->dd_origin_obj
,
2216 snap
->ds
->ds_dir
->dd_phys
->dd_head_dataset_obj
,
2217 &ddpa
->origin_snaps
, tag
);
2221 if (snap
->ds
->ds_dir
->dd_phys
->dd_origin_obj
!= 0) {
2222 error
= dsl_dataset_hold_obj(dp
,
2223 snap
->ds
->ds_dir
->dd_phys
->dd_origin_obj
,
2224 tag
, &ddpa
->origin_origin
);
2230 promote_rele(ddpa
, tag
);
2235 promote_rele(dsl_dataset_promote_arg_t
*ddpa
, void *tag
)
2237 snaplist_destroy(&ddpa
->shared_snaps
, tag
);
2238 snaplist_destroy(&ddpa
->clone_snaps
, tag
);
2239 snaplist_destroy(&ddpa
->origin_snaps
, tag
);
2240 if (ddpa
->origin_origin
!= NULL
)
2241 dsl_dataset_rele(ddpa
->origin_origin
, tag
);
2242 dsl_dataset_rele(ddpa
->ddpa_clone
, tag
);
2248 * If it fails due to a conflicting snapshot name, "conflsnap" will be filled
2249 * in with the name. (It must be at least MAXNAMELEN bytes long.)
2252 dsl_dataset_promote(const char *name
, char *conflsnap
)
2254 dsl_dataset_promote_arg_t ddpa
= { 0 };
2260 * We will modify space proportional to the number of
2261 * snapshots. Compute numsnaps.
2263 error
= dmu_objset_hold(name
, FTAG
, &os
);
2266 error
= zap_count(dmu_objset_pool(os
)->dp_meta_objset
,
2267 dmu_objset_ds(os
)->ds_phys
->ds_snapnames_zapobj
, &numsnaps
);
2268 dmu_objset_rele(os
, FTAG
);
2272 ddpa
.ddpa_clonename
= name
;
2273 ddpa
.err_ds
= conflsnap
;
2275 return (dsl_sync_task(name
, dsl_dataset_promote_check
,
2276 dsl_dataset_promote_sync
, &ddpa
, 2 + numsnaps
));
2280 dsl_dataset_clone_swap_check_impl(dsl_dataset_t
*clone
,
2281 dsl_dataset_t
*origin_head
, boolean_t force
)
2283 int64_t unused_refres_delta
;
2285 /* they should both be heads */
2286 if (dsl_dataset_is_snapshot(clone
) ||
2287 dsl_dataset_is_snapshot(origin_head
))
2288 return (SET_ERROR(EINVAL
));
2290 /* the branch point should be just before them */
2291 if (clone
->ds_prev
!= origin_head
->ds_prev
)
2292 return (SET_ERROR(EINVAL
));
2294 /* clone should be the clone (unless they are unrelated) */
2295 if (clone
->ds_prev
!= NULL
&&
2296 clone
->ds_prev
!= clone
->ds_dir
->dd_pool
->dp_origin_snap
&&
2297 origin_head
->ds_object
!=
2298 clone
->ds_prev
->ds_phys
->ds_next_snap_obj
)
2299 return (SET_ERROR(EINVAL
));
2301 /* the clone should be a child of the origin */
2302 if (clone
->ds_dir
->dd_parent
!= origin_head
->ds_dir
)
2303 return (SET_ERROR(EINVAL
));
2305 /* origin_head shouldn't be modified unless 'force' */
2306 if (!force
&& dsl_dataset_modified_since_lastsnap(origin_head
))
2307 return (SET_ERROR(ETXTBSY
));
2309 /* origin_head should have no long holds (e.g. is not mounted) */
2310 if (dsl_dataset_long_held(origin_head
))
2311 return (SET_ERROR(EBUSY
));
2313 /* check amount of any unconsumed refreservation */
2314 unused_refres_delta
=
2315 (int64_t)MIN(origin_head
->ds_reserved
,
2316 origin_head
->ds_phys
->ds_unique_bytes
) -
2317 (int64_t)MIN(origin_head
->ds_reserved
,
2318 clone
->ds_phys
->ds_unique_bytes
);
2320 if (unused_refres_delta
> 0 &&
2321 unused_refres_delta
>
2322 dsl_dir_space_available(origin_head
->ds_dir
, NULL
, 0, TRUE
))
2323 return (SET_ERROR(ENOSPC
));
2325 /* clone can't be over the head's refquota */
2326 if (origin_head
->ds_quota
!= 0 &&
2327 clone
->ds_phys
->ds_referenced_bytes
> origin_head
->ds_quota
)
2328 return (SET_ERROR(EDQUOT
));
2334 dsl_dataset_clone_swap_sync_impl(dsl_dataset_t
*clone
,
2335 dsl_dataset_t
*origin_head
, dmu_tx_t
*tx
)
2337 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
2338 int64_t unused_refres_delta
;
2340 ASSERT(clone
->ds_reserved
== 0);
2341 ASSERT(origin_head
->ds_quota
== 0 ||
2342 clone
->ds_phys
->ds_unique_bytes
<= origin_head
->ds_quota
);
2344 dmu_buf_will_dirty(clone
->ds_dbuf
, tx
);
2345 dmu_buf_will_dirty(origin_head
->ds_dbuf
, tx
);
2347 if (clone
->ds_objset
!= NULL
) {
2348 dmu_objset_evict(clone
->ds_objset
);
2349 clone
->ds_objset
= NULL
;
2352 if (origin_head
->ds_objset
!= NULL
) {
2353 dmu_objset_evict(origin_head
->ds_objset
);
2354 origin_head
->ds_objset
= NULL
;
2357 unused_refres_delta
=
2358 (int64_t)MIN(origin_head
->ds_reserved
,
2359 origin_head
->ds_phys
->ds_unique_bytes
) -
2360 (int64_t)MIN(origin_head
->ds_reserved
,
2361 clone
->ds_phys
->ds_unique_bytes
);
2364 * Reset origin's unique bytes, if it exists.
2366 if (clone
->ds_prev
) {
2367 dsl_dataset_t
*origin
= clone
->ds_prev
;
2368 uint64_t comp
, uncomp
;
2370 dmu_buf_will_dirty(origin
->ds_dbuf
, tx
);
2371 dsl_deadlist_space_range(&clone
->ds_deadlist
,
2372 origin
->ds_phys
->ds_prev_snap_txg
, UINT64_MAX
,
2373 &origin
->ds_phys
->ds_unique_bytes
, &comp
, &uncomp
);
2379 tmp
= origin_head
->ds_phys
->ds_bp
;
2380 origin_head
->ds_phys
->ds_bp
= clone
->ds_phys
->ds_bp
;
2381 clone
->ds_phys
->ds_bp
= tmp
;
2384 /* set dd_*_bytes */
2386 int64_t dused
, dcomp
, duncomp
;
2387 uint64_t cdl_used
, cdl_comp
, cdl_uncomp
;
2388 uint64_t odl_used
, odl_comp
, odl_uncomp
;
2390 ASSERT3U(clone
->ds_dir
->dd_phys
->
2391 dd_used_breakdown
[DD_USED_SNAP
], ==, 0);
2393 dsl_deadlist_space(&clone
->ds_deadlist
,
2394 &cdl_used
, &cdl_comp
, &cdl_uncomp
);
2395 dsl_deadlist_space(&origin_head
->ds_deadlist
,
2396 &odl_used
, &odl_comp
, &odl_uncomp
);
2398 dused
= clone
->ds_phys
->ds_referenced_bytes
+ cdl_used
-
2399 (origin_head
->ds_phys
->ds_referenced_bytes
+ odl_used
);
2400 dcomp
= clone
->ds_phys
->ds_compressed_bytes
+ cdl_comp
-
2401 (origin_head
->ds_phys
->ds_compressed_bytes
+ odl_comp
);
2402 duncomp
= clone
->ds_phys
->ds_uncompressed_bytes
+
2404 (origin_head
->ds_phys
->ds_uncompressed_bytes
+ odl_uncomp
);
2406 dsl_dir_diduse_space(origin_head
->ds_dir
, DD_USED_HEAD
,
2407 dused
, dcomp
, duncomp
, tx
);
2408 dsl_dir_diduse_space(clone
->ds_dir
, DD_USED_HEAD
,
2409 -dused
, -dcomp
, -duncomp
, tx
);
2412 * The difference in the space used by snapshots is the
2413 * difference in snapshot space due to the head's
2414 * deadlist (since that's the only thing that's
2415 * changing that affects the snapused).
2417 dsl_deadlist_space_range(&clone
->ds_deadlist
,
2418 origin_head
->ds_dir
->dd_origin_txg
, UINT64_MAX
,
2419 &cdl_used
, &cdl_comp
, &cdl_uncomp
);
2420 dsl_deadlist_space_range(&origin_head
->ds_deadlist
,
2421 origin_head
->ds_dir
->dd_origin_txg
, UINT64_MAX
,
2422 &odl_used
, &odl_comp
, &odl_uncomp
);
2423 dsl_dir_transfer_space(origin_head
->ds_dir
, cdl_used
- odl_used
,
2424 DD_USED_HEAD
, DD_USED_SNAP
, tx
);
2427 /* swap ds_*_bytes */
2428 SWITCH64(origin_head
->ds_phys
->ds_referenced_bytes
,
2429 clone
->ds_phys
->ds_referenced_bytes
);
2430 SWITCH64(origin_head
->ds_phys
->ds_compressed_bytes
,
2431 clone
->ds_phys
->ds_compressed_bytes
);
2432 SWITCH64(origin_head
->ds_phys
->ds_uncompressed_bytes
,
2433 clone
->ds_phys
->ds_uncompressed_bytes
);
2434 SWITCH64(origin_head
->ds_phys
->ds_unique_bytes
,
2435 clone
->ds_phys
->ds_unique_bytes
);
2437 /* apply any parent delta for change in unconsumed refreservation */
2438 dsl_dir_diduse_space(origin_head
->ds_dir
, DD_USED_REFRSRV
,
2439 unused_refres_delta
, 0, 0, tx
);
2444 dsl_deadlist_close(&clone
->ds_deadlist
);
2445 dsl_deadlist_close(&origin_head
->ds_deadlist
);
2446 SWITCH64(origin_head
->ds_phys
->ds_deadlist_obj
,
2447 clone
->ds_phys
->ds_deadlist_obj
);
2448 dsl_deadlist_open(&clone
->ds_deadlist
, dp
->dp_meta_objset
,
2449 clone
->ds_phys
->ds_deadlist_obj
);
2450 dsl_deadlist_open(&origin_head
->ds_deadlist
, dp
->dp_meta_objset
,
2451 origin_head
->ds_phys
->ds_deadlist_obj
);
2453 dsl_scan_ds_clone_swapped(origin_head
, clone
, tx
);
2455 spa_history_log_internal_ds(clone
, "clone swap", tx
,
2456 "parent=%s", origin_head
->ds_dir
->dd_myname
);
2460 * Given a pool name and a dataset object number in that pool,
2461 * return the name of that dataset.
2464 dsl_dsobj_to_dsname(char *pname
, uint64_t obj
, char *buf
)
2470 error
= dsl_pool_hold(pname
, FTAG
, &dp
);
2474 error
= dsl_dataset_hold_obj(dp
, obj
, FTAG
, &ds
);
2476 dsl_dataset_name(ds
, buf
);
2477 dsl_dataset_rele(ds
, FTAG
);
2479 dsl_pool_rele(dp
, FTAG
);
2485 dsl_dataset_check_quota(dsl_dataset_t
*ds
, boolean_t check_quota
,
2486 uint64_t asize
, uint64_t inflight
, uint64_t *used
, uint64_t *ref_rsrv
)
2490 ASSERT3S(asize
, >, 0);
2493 * *ref_rsrv is the portion of asize that will come from any
2494 * unconsumed refreservation space.
2498 mutex_enter(&ds
->ds_lock
);
2500 * Make a space adjustment for reserved bytes.
2502 if (ds
->ds_reserved
> ds
->ds_phys
->ds_unique_bytes
) {
2504 ds
->ds_reserved
- ds
->ds_phys
->ds_unique_bytes
);
2505 *used
-= (ds
->ds_reserved
- ds
->ds_phys
->ds_unique_bytes
);
2507 asize
- MIN(asize
, parent_delta(ds
, asize
+ inflight
));
2510 if (!check_quota
|| ds
->ds_quota
== 0) {
2511 mutex_exit(&ds
->ds_lock
);
2515 * If they are requesting more space, and our current estimate
2516 * is over quota, they get to try again unless the actual
2517 * on-disk is over quota and there are no pending changes (which
2518 * may free up space for us).
2520 if (ds
->ds_phys
->ds_referenced_bytes
+ inflight
>= ds
->ds_quota
) {
2522 ds
->ds_phys
->ds_referenced_bytes
< ds
->ds_quota
)
2523 error
= SET_ERROR(ERESTART
);
2525 error
= SET_ERROR(EDQUOT
);
2527 mutex_exit(&ds
->ds_lock
);
2532 typedef struct dsl_dataset_set_qr_arg
{
2533 const char *ddsqra_name
;
2534 zprop_source_t ddsqra_source
;
2535 uint64_t ddsqra_value
;
2536 } dsl_dataset_set_qr_arg_t
;
2541 dsl_dataset_set_refquota_check(void *arg
, dmu_tx_t
*tx
)
2543 dsl_dataset_set_qr_arg_t
*ddsqra
= arg
;
2544 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
2549 if (spa_version(dp
->dp_spa
) < SPA_VERSION_REFQUOTA
)
2550 return (SET_ERROR(ENOTSUP
));
2552 error
= dsl_dataset_hold(dp
, ddsqra
->ddsqra_name
, FTAG
, &ds
);
2556 if (dsl_dataset_is_snapshot(ds
)) {
2557 dsl_dataset_rele(ds
, FTAG
);
2558 return (SET_ERROR(EINVAL
));
2561 error
= dsl_prop_predict(ds
->ds_dir
,
2562 zfs_prop_to_name(ZFS_PROP_REFQUOTA
),
2563 ddsqra
->ddsqra_source
, ddsqra
->ddsqra_value
, &newval
);
2565 dsl_dataset_rele(ds
, FTAG
);
2570 dsl_dataset_rele(ds
, FTAG
);
2574 if (newval
< ds
->ds_phys
->ds_referenced_bytes
||
2575 newval
< ds
->ds_reserved
) {
2576 dsl_dataset_rele(ds
, FTAG
);
2577 return (SET_ERROR(ENOSPC
));
2580 dsl_dataset_rele(ds
, FTAG
);
2585 dsl_dataset_set_refquota_sync(void *arg
, dmu_tx_t
*tx
)
2587 dsl_dataset_set_qr_arg_t
*ddsqra
= arg
;
2588 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
2592 VERIFY0(dsl_dataset_hold(dp
, ddsqra
->ddsqra_name
, FTAG
, &ds
));
2594 dsl_prop_set_sync_impl(ds
,
2595 zfs_prop_to_name(ZFS_PROP_REFQUOTA
),
2596 ddsqra
->ddsqra_source
, sizeof (ddsqra
->ddsqra_value
), 1,
2597 &ddsqra
->ddsqra_value
, tx
);
2599 VERIFY0(dsl_prop_get_int_ds(ds
,
2600 zfs_prop_to_name(ZFS_PROP_REFQUOTA
), &newval
));
2602 if (ds
->ds_quota
!= newval
) {
2603 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
2604 ds
->ds_quota
= newval
;
2606 dsl_dataset_rele(ds
, FTAG
);
2610 dsl_dataset_set_refquota(const char *dsname
, zprop_source_t source
,
2613 dsl_dataset_set_qr_arg_t ddsqra
;
2615 ddsqra
.ddsqra_name
= dsname
;
2616 ddsqra
.ddsqra_source
= source
;
2617 ddsqra
.ddsqra_value
= refquota
;
2619 return (dsl_sync_task(dsname
, dsl_dataset_set_refquota_check
,
2620 dsl_dataset_set_refquota_sync
, &ddsqra
, 0));
2624 dsl_dataset_set_refreservation_check(void *arg
, dmu_tx_t
*tx
)
2626 dsl_dataset_set_qr_arg_t
*ddsqra
= arg
;
2627 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
2630 uint64_t newval
, unique
;
2632 if (spa_version(dp
->dp_spa
) < SPA_VERSION_REFRESERVATION
)
2633 return (SET_ERROR(ENOTSUP
));
2635 error
= dsl_dataset_hold(dp
, ddsqra
->ddsqra_name
, FTAG
, &ds
);
2639 if (dsl_dataset_is_snapshot(ds
)) {
2640 dsl_dataset_rele(ds
, FTAG
);
2641 return (SET_ERROR(EINVAL
));
2644 error
= dsl_prop_predict(ds
->ds_dir
,
2645 zfs_prop_to_name(ZFS_PROP_REFRESERVATION
),
2646 ddsqra
->ddsqra_source
, ddsqra
->ddsqra_value
, &newval
);
2648 dsl_dataset_rele(ds
, FTAG
);
2653 * If we are doing the preliminary check in open context, the
2654 * space estimates may be inaccurate.
2656 if (!dmu_tx_is_syncing(tx
)) {
2657 dsl_dataset_rele(ds
, FTAG
);
2661 mutex_enter(&ds
->ds_lock
);
2662 if (!DS_UNIQUE_IS_ACCURATE(ds
))
2663 dsl_dataset_recalc_head_uniq(ds
);
2664 unique
= ds
->ds_phys
->ds_unique_bytes
;
2665 mutex_exit(&ds
->ds_lock
);
2667 if (MAX(unique
, newval
) > MAX(unique
, ds
->ds_reserved
)) {
2668 uint64_t delta
= MAX(unique
, newval
) -
2669 MAX(unique
, ds
->ds_reserved
);
2672 dsl_dir_space_available(ds
->ds_dir
, NULL
, 0, B_TRUE
) ||
2673 (ds
->ds_quota
> 0 && newval
> ds
->ds_quota
)) {
2674 dsl_dataset_rele(ds
, FTAG
);
2675 return (SET_ERROR(ENOSPC
));
2679 dsl_dataset_rele(ds
, FTAG
);
2684 dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t
*ds
,
2685 zprop_source_t source
, uint64_t value
, dmu_tx_t
*tx
)
2691 dsl_prop_set_sync_impl(ds
, zfs_prop_to_name(ZFS_PROP_REFRESERVATION
),
2692 source
, sizeof (value
), 1, &value
, tx
);
2694 VERIFY0(dsl_prop_get_int_ds(ds
,
2695 zfs_prop_to_name(ZFS_PROP_REFRESERVATION
), &newval
));
2697 dmu_buf_will_dirty(ds
->ds_dbuf
, tx
);
2698 mutex_enter(&ds
->ds_dir
->dd_lock
);
2699 mutex_enter(&ds
->ds_lock
);
2700 ASSERT(DS_UNIQUE_IS_ACCURATE(ds
));
2701 unique
= ds
->ds_phys
->ds_unique_bytes
;
2702 delta
= MAX(0, (int64_t)(newval
- unique
)) -
2703 MAX(0, (int64_t)(ds
->ds_reserved
- unique
));
2704 ds
->ds_reserved
= newval
;
2705 mutex_exit(&ds
->ds_lock
);
2707 dsl_dir_diduse_space(ds
->ds_dir
, DD_USED_REFRSRV
, delta
, 0, 0, tx
);
2708 mutex_exit(&ds
->ds_dir
->dd_lock
);
2712 dsl_dataset_set_refreservation_sync(void *arg
, dmu_tx_t
*tx
)
2714 dsl_dataset_set_qr_arg_t
*ddsqra
= arg
;
2715 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
2718 VERIFY0(dsl_dataset_hold(dp
, ddsqra
->ddsqra_name
, FTAG
, &ds
));
2719 dsl_dataset_set_refreservation_sync_impl(ds
,
2720 ddsqra
->ddsqra_source
, ddsqra
->ddsqra_value
, tx
);
2721 dsl_dataset_rele(ds
, FTAG
);
2725 dsl_dataset_set_refreservation(const char *dsname
, zprop_source_t source
,
2726 uint64_t refreservation
)
2728 dsl_dataset_set_qr_arg_t ddsqra
;
2730 ddsqra
.ddsqra_name
= dsname
;
2731 ddsqra
.ddsqra_source
= source
;
2732 ddsqra
.ddsqra_value
= refreservation
;
2734 return (dsl_sync_task(dsname
, dsl_dataset_set_refreservation_check
,
2735 dsl_dataset_set_refreservation_sync
, &ddsqra
, 0));
2739 * Return (in *usedp) the amount of space written in new that is not
2740 * present in oldsnap. New may be a snapshot or the head. Old must be
2741 * a snapshot before new, in new's filesystem (or its origin). If not then
2742 * fail and return EINVAL.
2744 * The written space is calculated by considering two components: First, we
2745 * ignore any freed space, and calculate the written as new's used space
2746 * minus old's used space. Next, we add in the amount of space that was freed
2747 * between the two snapshots, thus reducing new's used space relative to old's.
2748 * Specifically, this is the space that was born before old->ds_creation_txg,
2749 * and freed before new (ie. on new's deadlist or a previous deadlist).
2751 * space freed [---------------------]
2752 * snapshots ---O-------O--------O-------O------
2756 dsl_dataset_space_written(dsl_dataset_t
*oldsnap
, dsl_dataset_t
*new,
2757 uint64_t *usedp
, uint64_t *compp
, uint64_t *uncompp
)
2761 dsl_pool_t
*dp
= new->ds_dir
->dd_pool
;
2763 ASSERT(dsl_pool_config_held(dp
));
2766 *usedp
+= new->ds_phys
->ds_referenced_bytes
;
2767 *usedp
-= oldsnap
->ds_phys
->ds_referenced_bytes
;
2770 *compp
+= new->ds_phys
->ds_compressed_bytes
;
2771 *compp
-= oldsnap
->ds_phys
->ds_compressed_bytes
;
2774 *uncompp
+= new->ds_phys
->ds_uncompressed_bytes
;
2775 *uncompp
-= oldsnap
->ds_phys
->ds_uncompressed_bytes
;
2777 snapobj
= new->ds_object
;
2778 while (snapobj
!= oldsnap
->ds_object
) {
2779 dsl_dataset_t
*snap
;
2780 uint64_t used
, comp
, uncomp
;
2782 if (snapobj
== new->ds_object
) {
2785 err
= dsl_dataset_hold_obj(dp
, snapobj
, FTAG
, &snap
);
2790 if (snap
->ds_phys
->ds_prev_snap_txg
==
2791 oldsnap
->ds_phys
->ds_creation_txg
) {
2793 * The blocks in the deadlist can not be born after
2794 * ds_prev_snap_txg, so get the whole deadlist space,
2795 * which is more efficient (especially for old-format
2796 * deadlists). Unfortunately the deadlist code
2797 * doesn't have enough information to make this
2798 * optimization itself.
2800 dsl_deadlist_space(&snap
->ds_deadlist
,
2801 &used
, &comp
, &uncomp
);
2803 dsl_deadlist_space_range(&snap
->ds_deadlist
,
2804 0, oldsnap
->ds_phys
->ds_creation_txg
,
2805 &used
, &comp
, &uncomp
);
2812 * If we get to the beginning of the chain of snapshots
2813 * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
2814 * was not a snapshot of/before new.
2816 snapobj
= snap
->ds_phys
->ds_prev_snap_obj
;
2818 dsl_dataset_rele(snap
, FTAG
);
2820 err
= SET_ERROR(EINVAL
);
2829 * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
2830 * lastsnap, and all snapshots in between are deleted.
2832 * blocks that would be freed [---------------------------]
2833 * snapshots ---O-------O--------O-------O--------O
2834 * firstsnap lastsnap
2836 * This is the set of blocks that were born after the snap before firstsnap,
2837 * (birth > firstsnap->prev_snap_txg) and died before the snap after the
2838 * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
2839 * We calculate this by iterating over the relevant deadlists (from the snap
2840 * after lastsnap, backward to the snap after firstsnap), summing up the
2841 * space on the deadlist that was born after the snap before firstsnap.
2844 dsl_dataset_space_wouldfree(dsl_dataset_t
*firstsnap
,
2845 dsl_dataset_t
*lastsnap
,
2846 uint64_t *usedp
, uint64_t *compp
, uint64_t *uncompp
)
2850 dsl_pool_t
*dp
= firstsnap
->ds_dir
->dd_pool
;
2852 ASSERT(dsl_dataset_is_snapshot(firstsnap
));
2853 ASSERT(dsl_dataset_is_snapshot(lastsnap
));
2856 * Check that the snapshots are in the same dsl_dir, and firstsnap
2857 * is before lastsnap.
2859 if (firstsnap
->ds_dir
!= lastsnap
->ds_dir
||
2860 firstsnap
->ds_phys
->ds_creation_txg
>
2861 lastsnap
->ds_phys
->ds_creation_txg
)
2862 return (SET_ERROR(EINVAL
));
2864 *usedp
= *compp
= *uncompp
= 0;
2866 snapobj
= lastsnap
->ds_phys
->ds_next_snap_obj
;
2867 while (snapobj
!= firstsnap
->ds_object
) {
2869 uint64_t used
, comp
, uncomp
;
2871 err
= dsl_dataset_hold_obj(dp
, snapobj
, FTAG
, &ds
);
2875 dsl_deadlist_space_range(&ds
->ds_deadlist
,
2876 firstsnap
->ds_phys
->ds_prev_snap_txg
, UINT64_MAX
,
2877 &used
, &comp
, &uncomp
);
2882 snapobj
= ds
->ds_phys
->ds_prev_snap_obj
;
2883 ASSERT3U(snapobj
, !=, 0);
2884 dsl_dataset_rele(ds
, FTAG
);
2890 * Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
2891 * For example, they could both be snapshots of the same filesystem, and
2892 * 'earlier' is before 'later'. Or 'earlier' could be the origin of
2893 * 'later's filesystem. Or 'earlier' could be an older snapshot in the origin's
2894 * filesystem. Or 'earlier' could be the origin's origin.
2897 dsl_dataset_is_before(dsl_dataset_t
*later
, dsl_dataset_t
*earlier
)
2899 dsl_pool_t
*dp
= later
->ds_dir
->dd_pool
;
2902 dsl_dataset_t
*origin
;
2904 ASSERT(dsl_pool_config_held(dp
));
2906 if (earlier
->ds_phys
->ds_creation_txg
>=
2907 later
->ds_phys
->ds_creation_txg
)
2910 if (later
->ds_dir
== earlier
->ds_dir
)
2912 if (!dsl_dir_is_clone(later
->ds_dir
))
2915 if (later
->ds_dir
->dd_phys
->dd_origin_obj
== earlier
->ds_object
)
2917 error
= dsl_dataset_hold_obj(dp
,
2918 later
->ds_dir
->dd_phys
->dd_origin_obj
, FTAG
, &origin
);
2921 ret
= dsl_dataset_is_before(origin
, earlier
);
2922 dsl_dataset_rele(origin
, FTAG
);
2926 #if defined(_KERNEL) && defined(HAVE_SPL)
2927 EXPORT_SYMBOL(dsl_dataset_hold
);
2928 EXPORT_SYMBOL(dsl_dataset_hold_obj
);
2929 EXPORT_SYMBOL(dsl_dataset_own
);
2930 EXPORT_SYMBOL(dsl_dataset_own_obj
);
2931 EXPORT_SYMBOL(dsl_dataset_name
);
2932 EXPORT_SYMBOL(dsl_dataset_rele
);
2933 EXPORT_SYMBOL(dsl_dataset_disown
);
2934 EXPORT_SYMBOL(dsl_dataset_tryown
);
2935 EXPORT_SYMBOL(dsl_dataset_create_sync
);
2936 EXPORT_SYMBOL(dsl_dataset_create_sync_dd
);
2937 EXPORT_SYMBOL(dsl_dataset_snapshot_check
);
2938 EXPORT_SYMBOL(dsl_dataset_snapshot_sync
);
2939 EXPORT_SYMBOL(dsl_dataset_promote
);
2940 EXPORT_SYMBOL(dsl_dataset_user_hold
);
2941 EXPORT_SYMBOL(dsl_dataset_user_release
);
2942 EXPORT_SYMBOL(dsl_dataset_get_holds
);
2943 EXPORT_SYMBOL(dsl_dataset_get_blkptr
);
2944 EXPORT_SYMBOL(dsl_dataset_set_blkptr
);
2945 EXPORT_SYMBOL(dsl_dataset_get_spa
);
2946 EXPORT_SYMBOL(dsl_dataset_modified_since_lastsnap
);
2947 EXPORT_SYMBOL(dsl_dataset_space_written
);
2948 EXPORT_SYMBOL(dsl_dataset_space_wouldfree
);
2949 EXPORT_SYMBOL(dsl_dataset_sync
);
2950 EXPORT_SYMBOL(dsl_dataset_block_born
);
2951 EXPORT_SYMBOL(dsl_dataset_block_kill
);
2952 EXPORT_SYMBOL(dsl_dataset_block_freeable
);
2953 EXPORT_SYMBOL(dsl_dataset_prev_snap_txg
);
2954 EXPORT_SYMBOL(dsl_dataset_dirty
);
2955 EXPORT_SYMBOL(dsl_dataset_stats
);
2956 EXPORT_SYMBOL(dsl_dataset_fast_stat
);
2957 EXPORT_SYMBOL(dsl_dataset_space
);
2958 EXPORT_SYMBOL(dsl_dataset_fsid_guid
);
2959 EXPORT_SYMBOL(dsl_dsobj_to_dsname
);
2960 EXPORT_SYMBOL(dsl_dataset_check_quota
);
2961 EXPORT_SYMBOL(dsl_dataset_clone_swap_check_impl
);
2962 EXPORT_SYMBOL(dsl_dataset_clone_swap_sync_impl
);