4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
27 /* Portions Copyright 2010 Robert Milkowski */
30 #include <sys/zfs_context.h>
31 #include <sys/dmu_objset.h>
32 #include <sys/dsl_dir.h>
33 #include <sys/dsl_dataset.h>
34 #include <sys/dsl_prop.h>
35 #include <sys/dsl_pool.h>
36 #include <sys/dsl_synctask.h>
37 #include <sys/dsl_deleg.h>
38 #include <sys/dnode.h>
41 #include <sys/dmu_tx.h>
44 #include <sys/dmu_impl.h>
45 #include <sys/zfs_ioctl.h>
47 #include <sys/zfs_onexit.h>
48 #include <sys/dsl_destroy.h>
51 * Needed to close a window in dnode_move() that allows the objset to be freed
52 * before it can be safely accessed.
59 rw_init(&os_lock
, NULL
, RW_DEFAULT
, NULL
);
69 dmu_objset_spa(objset_t
*os
)
75 dmu_objset_zil(objset_t
*os
)
81 dmu_objset_pool(objset_t
*os
)
85 if ((ds
= os
->os_dsl_dataset
) != NULL
&& ds
->ds_dir
)
86 return (ds
->ds_dir
->dd_pool
);
88 return (spa_get_dsl(os
->os_spa
));
92 dmu_objset_ds(objset_t
*os
)
94 return (os
->os_dsl_dataset
);
98 dmu_objset_type(objset_t
*os
)
100 return (os
->os_phys
->os_type
);
104 dmu_objset_name(objset_t
*os
, char *buf
)
106 dsl_dataset_name(os
->os_dsl_dataset
, buf
);
110 dmu_objset_id(objset_t
*os
)
112 dsl_dataset_t
*ds
= os
->os_dsl_dataset
;
114 return (ds
? ds
->ds_object
: 0);
118 dmu_objset_syncprop(objset_t
*os
)
120 return (os
->os_sync
);
124 dmu_objset_logbias(objset_t
*os
)
126 return (os
->os_logbias
);
130 checksum_changed_cb(void *arg
, uint64_t newval
)
135 * Inheritance should have been done by now.
137 ASSERT(newval
!= ZIO_CHECKSUM_INHERIT
);
139 os
->os_checksum
= zio_checksum_select(newval
, ZIO_CHECKSUM_ON_VALUE
);
143 compression_changed_cb(void *arg
, uint64_t newval
)
148 * Inheritance and range checking should have been done by now.
150 ASSERT(newval
!= ZIO_COMPRESS_INHERIT
);
152 os
->os_compress
= zio_compress_select(newval
, ZIO_COMPRESS_ON_VALUE
);
156 copies_changed_cb(void *arg
, uint64_t newval
)
161 * Inheritance and range checking should have been done by now.
164 ASSERT(newval
<= spa_max_replication(os
->os_spa
));
166 os
->os_copies
= newval
;
170 dedup_changed_cb(void *arg
, uint64_t newval
)
173 spa_t
*spa
= os
->os_spa
;
174 enum zio_checksum checksum
;
177 * Inheritance should have been done by now.
179 ASSERT(newval
!= ZIO_CHECKSUM_INHERIT
);
181 checksum
= zio_checksum_dedup_select(spa
, newval
, ZIO_CHECKSUM_OFF
);
183 os
->os_dedup_checksum
= checksum
& ZIO_CHECKSUM_MASK
;
184 os
->os_dedup_verify
= !!(checksum
& ZIO_CHECKSUM_VERIFY
);
188 primary_cache_changed_cb(void *arg
, uint64_t newval
)
193 * Inheritance and range checking should have been done by now.
195 ASSERT(newval
== ZFS_CACHE_ALL
|| newval
== ZFS_CACHE_NONE
||
196 newval
== ZFS_CACHE_METADATA
);
198 os
->os_primary_cache
= newval
;
202 secondary_cache_changed_cb(void *arg
, uint64_t newval
)
207 * Inheritance and range checking should have been done by now.
209 ASSERT(newval
== ZFS_CACHE_ALL
|| newval
== ZFS_CACHE_NONE
||
210 newval
== ZFS_CACHE_METADATA
);
212 os
->os_secondary_cache
= newval
;
216 sync_changed_cb(void *arg
, uint64_t newval
)
221 * Inheritance and range checking should have been done by now.
223 ASSERT(newval
== ZFS_SYNC_STANDARD
|| newval
== ZFS_SYNC_ALWAYS
||
224 newval
== ZFS_SYNC_DISABLED
);
226 os
->os_sync
= newval
;
228 zil_set_sync(os
->os_zil
, newval
);
232 logbias_changed_cb(void *arg
, uint64_t newval
)
236 ASSERT(newval
== ZFS_LOGBIAS_LATENCY
||
237 newval
== ZFS_LOGBIAS_THROUGHPUT
);
238 os
->os_logbias
= newval
;
240 zil_set_logbias(os
->os_zil
, newval
);
244 dmu_objset_byteswap(void *buf
, size_t size
)
246 objset_phys_t
*osp
= buf
;
248 ASSERT(size
== OBJSET_OLD_PHYS_SIZE
|| size
== sizeof (objset_phys_t
));
249 dnode_byteswap(&osp
->os_meta_dnode
);
250 byteswap_uint64_array(&osp
->os_zil_header
, sizeof (zil_header_t
));
251 osp
->os_type
= BSWAP_64(osp
->os_type
);
252 osp
->os_flags
= BSWAP_64(osp
->os_flags
);
253 if (size
== sizeof (objset_phys_t
)) {
254 dnode_byteswap(&osp
->os_userused_dnode
);
255 dnode_byteswap(&osp
->os_groupused_dnode
);
260 dmu_objset_open_impl(spa_t
*spa
, dsl_dataset_t
*ds
, blkptr_t
*bp
,
266 ASSERT(ds
== NULL
|| MUTEX_HELD(&ds
->ds_opening_lock
));
268 os
= kmem_zalloc(sizeof (objset_t
), KM_PUSHPAGE
);
269 os
->os_dsl_dataset
= ds
;
272 if (!BP_IS_HOLE(os
->os_rootbp
)) {
273 uint32_t aflags
= ARC_WAIT
;
275 SET_BOOKMARK(&zb
, ds
? ds
->ds_object
: DMU_META_OBJSET
,
276 ZB_ROOT_OBJECT
, ZB_ROOT_LEVEL
, ZB_ROOT_BLKID
);
278 if (DMU_OS_IS_L2CACHEABLE(os
))
279 aflags
|= ARC_L2CACHE
;
280 if (DMU_OS_IS_L2COMPRESSIBLE(os
))
281 aflags
|= ARC_L2COMPRESS
;
283 dprintf_bp(os
->os_rootbp
, "reading %s", "");
284 err
= arc_read(NULL
, spa
, os
->os_rootbp
,
285 arc_getbuf_func
, &os
->os_phys_buf
,
286 ZIO_PRIORITY_SYNC_READ
, ZIO_FLAG_CANFAIL
, &aflags
, &zb
);
288 kmem_free(os
, sizeof (objset_t
));
289 /* convert checksum errors into IO errors */
291 err
= SET_ERROR(EIO
);
295 /* Increase the blocksize if we are permitted. */
296 if (spa_version(spa
) >= SPA_VERSION_USERSPACE
&&
297 arc_buf_size(os
->os_phys_buf
) < sizeof (objset_phys_t
)) {
298 arc_buf_t
*buf
= arc_buf_alloc(spa
,
299 sizeof (objset_phys_t
), &os
->os_phys_buf
,
301 bzero(buf
->b_data
, sizeof (objset_phys_t
));
302 bcopy(os
->os_phys_buf
->b_data
, buf
->b_data
,
303 arc_buf_size(os
->os_phys_buf
));
304 (void) arc_buf_remove_ref(os
->os_phys_buf
,
306 os
->os_phys_buf
= buf
;
309 os
->os_phys
= os
->os_phys_buf
->b_data
;
310 os
->os_flags
= os
->os_phys
->os_flags
;
312 int size
= spa_version(spa
) >= SPA_VERSION_USERSPACE
?
313 sizeof (objset_phys_t
) : OBJSET_OLD_PHYS_SIZE
;
314 os
->os_phys_buf
= arc_buf_alloc(spa
, size
,
315 &os
->os_phys_buf
, ARC_BUFC_METADATA
);
316 os
->os_phys
= os
->os_phys_buf
->b_data
;
317 bzero(os
->os_phys
, size
);
321 * Note: the changed_cb will be called once before the register
322 * func returns, thus changing the checksum/compression from the
323 * default (fletcher2/off). Snapshots don't need to know about
324 * checksum/compression/copies.
327 err
= dsl_prop_register(ds
,
328 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE
),
329 primary_cache_changed_cb
, os
);
331 err
= dsl_prop_register(ds
,
332 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE
),
333 secondary_cache_changed_cb
, os
);
335 if (!dsl_dataset_is_snapshot(ds
)) {
337 err
= dsl_prop_register(ds
,
338 zfs_prop_to_name(ZFS_PROP_CHECKSUM
),
339 checksum_changed_cb
, os
);
342 err
= dsl_prop_register(ds
,
343 zfs_prop_to_name(ZFS_PROP_COMPRESSION
),
344 compression_changed_cb
, os
);
347 err
= dsl_prop_register(ds
,
348 zfs_prop_to_name(ZFS_PROP_COPIES
),
349 copies_changed_cb
, os
);
352 err
= dsl_prop_register(ds
,
353 zfs_prop_to_name(ZFS_PROP_DEDUP
),
354 dedup_changed_cb
, os
);
357 err
= dsl_prop_register(ds
,
358 zfs_prop_to_name(ZFS_PROP_LOGBIAS
),
359 logbias_changed_cb
, os
);
362 err
= dsl_prop_register(ds
,
363 zfs_prop_to_name(ZFS_PROP_SYNC
),
364 sync_changed_cb
, os
);
368 VERIFY(arc_buf_remove_ref(os
->os_phys_buf
,
370 kmem_free(os
, sizeof (objset_t
));
373 } else if (ds
== NULL
) {
374 /* It's the meta-objset. */
375 os
->os_checksum
= ZIO_CHECKSUM_FLETCHER_4
;
376 os
->os_compress
= ZIO_COMPRESS_LZJB
;
377 os
->os_copies
= spa_max_replication(spa
);
378 os
->os_dedup_checksum
= ZIO_CHECKSUM_OFF
;
379 os
->os_dedup_verify
= 0;
382 os
->os_primary_cache
= ZFS_CACHE_ALL
;
383 os
->os_secondary_cache
= ZFS_CACHE_ALL
;
386 if (ds
== NULL
|| !dsl_dataset_is_snapshot(ds
))
387 os
->os_zil_header
= os
->os_phys
->os_zil_header
;
388 os
->os_zil
= zil_alloc(os
, &os
->os_zil_header
);
390 for (i
= 0; i
< TXG_SIZE
; i
++) {
391 list_create(&os
->os_dirty_dnodes
[i
], sizeof (dnode_t
),
392 offsetof(dnode_t
, dn_dirty_link
[i
]));
393 list_create(&os
->os_free_dnodes
[i
], sizeof (dnode_t
),
394 offsetof(dnode_t
, dn_dirty_link
[i
]));
396 list_create(&os
->os_dnodes
, sizeof (dnode_t
),
397 offsetof(dnode_t
, dn_link
));
398 list_create(&os
->os_downgraded_dbufs
, sizeof (dmu_buf_impl_t
),
399 offsetof(dmu_buf_impl_t
, db_link
));
401 mutex_init(&os
->os_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
402 mutex_init(&os
->os_obj_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
403 mutex_init(&os
->os_user_ptr_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
405 DMU_META_DNODE(os
) = dnode_special_open(os
,
406 &os
->os_phys
->os_meta_dnode
, DMU_META_DNODE_OBJECT
,
408 if (arc_buf_size(os
->os_phys_buf
) >= sizeof (objset_phys_t
)) {
409 DMU_USERUSED_DNODE(os
) = dnode_special_open(os
,
410 &os
->os_phys
->os_userused_dnode
, DMU_USERUSED_OBJECT
,
411 &os
->os_userused_dnode
);
412 DMU_GROUPUSED_DNODE(os
) = dnode_special_open(os
,
413 &os
->os_phys
->os_groupused_dnode
, DMU_GROUPUSED_OBJECT
,
414 &os
->os_groupused_dnode
);
418 * We should be the only thread trying to do this because we
419 * have ds_opening_lock
422 mutex_enter(&ds
->ds_lock
);
423 ASSERT(ds
->ds_objset
== NULL
);
425 mutex_exit(&ds
->ds_lock
);
433 dmu_objset_from_ds(dsl_dataset_t
*ds
, objset_t
**osp
)
437 mutex_enter(&ds
->ds_opening_lock
);
438 *osp
= ds
->ds_objset
;
440 err
= dmu_objset_open_impl(dsl_dataset_get_spa(ds
),
441 ds
, dsl_dataset_get_blkptr(ds
), osp
);
443 mutex_exit(&ds
->ds_opening_lock
);
448 * Holds the pool while the objset is held. Therefore only one objset
449 * can be held at a time.
452 dmu_objset_hold(const char *name
, void *tag
, objset_t
**osp
)
458 err
= dsl_pool_hold(name
, tag
, &dp
);
461 err
= dsl_dataset_hold(dp
, name
, tag
, &ds
);
463 dsl_pool_rele(dp
, tag
);
467 err
= dmu_objset_from_ds(ds
, osp
);
469 dsl_dataset_rele(ds
, tag
);
470 dsl_pool_rele(dp
, tag
);
477 * dsl_pool must not be held when this is called.
478 * Upon successful return, there will be a longhold on the dataset,
479 * and the dsl_pool will not be held.
482 dmu_objset_own(const char *name
, dmu_objset_type_t type
,
483 boolean_t readonly
, void *tag
, objset_t
**osp
)
489 err
= dsl_pool_hold(name
, FTAG
, &dp
);
492 err
= dsl_dataset_own(dp
, name
, tag
, &ds
);
494 dsl_pool_rele(dp
, FTAG
);
498 err
= dmu_objset_from_ds(ds
, osp
);
499 dsl_pool_rele(dp
, FTAG
);
501 dsl_dataset_disown(ds
, tag
);
502 } else if (type
!= DMU_OST_ANY
&& type
!= (*osp
)->os_phys
->os_type
) {
503 dsl_dataset_disown(ds
, tag
);
504 return (SET_ERROR(EINVAL
));
505 } else if (!readonly
&& dsl_dataset_is_snapshot(ds
)) {
506 dsl_dataset_disown(ds
, tag
);
507 return (SET_ERROR(EROFS
));
513 dmu_objset_rele(objset_t
*os
, void *tag
)
515 dsl_pool_t
*dp
= dmu_objset_pool(os
);
516 dsl_dataset_rele(os
->os_dsl_dataset
, tag
);
517 dsl_pool_rele(dp
, tag
);
521 * When we are called, os MUST refer to an objset associated with a dataset
522 * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
523 * == tag. We will then release and reacquire ownership of the dataset while
524 * holding the pool config_rwlock to avoid intervening namespace or ownership
527 * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
528 * release the hold on its dataset and acquire a new one on the dataset of the
529 * same name so that it can be partially torn down and reconstructed.
532 dmu_objset_refresh_ownership(objset_t
*os
, void *tag
)
535 dsl_dataset_t
*ds
, *newds
;
536 char name
[MAXNAMELEN
];
538 ds
= os
->os_dsl_dataset
;
539 VERIFY3P(ds
, !=, NULL
);
540 VERIFY3P(ds
->ds_owner
, ==, tag
);
541 VERIFY(dsl_dataset_long_held(ds
));
543 dsl_dataset_name(ds
, name
);
544 dp
= dmu_objset_pool(os
);
545 dsl_pool_config_enter(dp
, FTAG
);
546 dmu_objset_disown(os
, tag
);
547 VERIFY0(dsl_dataset_own(dp
, name
, tag
, &newds
));
548 VERIFY3P(newds
, ==, os
->os_dsl_dataset
);
549 dsl_pool_config_exit(dp
, FTAG
);
553 dmu_objset_disown(objset_t
*os
, void *tag
)
555 dsl_dataset_disown(os
->os_dsl_dataset
, tag
);
559 dmu_objset_evict_dbufs(objset_t
*os
)
563 mutex_enter(&os
->os_lock
);
565 /* process the mdn last, since the other dnodes have holds on it */
566 list_remove(&os
->os_dnodes
, DMU_META_DNODE(os
));
567 list_insert_tail(&os
->os_dnodes
, DMU_META_DNODE(os
));
570 * Find the first dnode with holds. We have to do this dance
571 * because dnode_add_ref() only works if you already have a
572 * hold. If there are no holds then it has no dbufs so OK to
575 for (dn
= list_head(&os
->os_dnodes
);
576 dn
&& !dnode_add_ref(dn
, FTAG
);
577 dn
= list_next(&os
->os_dnodes
, dn
))
581 dnode_t
*next_dn
= dn
;
584 next_dn
= list_next(&os
->os_dnodes
, next_dn
);
585 } while (next_dn
&& !dnode_add_ref(next_dn
, FTAG
));
587 mutex_exit(&os
->os_lock
);
588 dnode_evict_dbufs(dn
);
589 dnode_rele(dn
, FTAG
);
590 mutex_enter(&os
->os_lock
);
593 mutex_exit(&os
->os_lock
);
597 dmu_objset_evict(objset_t
*os
)
601 dsl_dataset_t
*ds
= os
->os_dsl_dataset
;
603 for (t
= 0; t
< TXG_SIZE
; t
++)
604 ASSERT(!dmu_objset_is_dirty(os
, t
));
607 if (!dsl_dataset_is_snapshot(ds
)) {
608 VERIFY0(dsl_prop_unregister(ds
,
609 zfs_prop_to_name(ZFS_PROP_CHECKSUM
),
610 checksum_changed_cb
, os
));
611 VERIFY0(dsl_prop_unregister(ds
,
612 zfs_prop_to_name(ZFS_PROP_COMPRESSION
),
613 compression_changed_cb
, os
));
614 VERIFY0(dsl_prop_unregister(ds
,
615 zfs_prop_to_name(ZFS_PROP_COPIES
),
616 copies_changed_cb
, os
));
617 VERIFY0(dsl_prop_unregister(ds
,
618 zfs_prop_to_name(ZFS_PROP_DEDUP
),
619 dedup_changed_cb
, os
));
620 VERIFY0(dsl_prop_unregister(ds
,
621 zfs_prop_to_name(ZFS_PROP_LOGBIAS
),
622 logbias_changed_cb
, os
));
623 VERIFY0(dsl_prop_unregister(ds
,
624 zfs_prop_to_name(ZFS_PROP_SYNC
),
625 sync_changed_cb
, os
));
627 VERIFY0(dsl_prop_unregister(ds
,
628 zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE
),
629 primary_cache_changed_cb
, os
));
630 VERIFY0(dsl_prop_unregister(ds
,
631 zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE
),
632 secondary_cache_changed_cb
, os
));
638 dmu_objset_evict_dbufs(os
);
640 dnode_special_close(&os
->os_meta_dnode
);
641 if (DMU_USERUSED_DNODE(os
)) {
642 dnode_special_close(&os
->os_userused_dnode
);
643 dnode_special_close(&os
->os_groupused_dnode
);
645 zil_free(os
->os_zil
);
647 ASSERT3P(list_head(&os
->os_dnodes
), ==, NULL
);
649 VERIFY(arc_buf_remove_ref(os
->os_phys_buf
, &os
->os_phys_buf
));
652 * This is a barrier to prevent the objset from going away in
653 * dnode_move() until we can safely ensure that the objset is still in
654 * use. We consider the objset valid before the barrier and invalid
657 rw_enter(&os_lock
, RW_READER
);
660 mutex_destroy(&os
->os_lock
);
661 mutex_destroy(&os
->os_obj_lock
);
662 mutex_destroy(&os
->os_user_ptr_lock
);
663 kmem_free(os
, sizeof (objset_t
));
667 dmu_objset_snap_cmtime(objset_t
*os
)
669 return (dsl_dir_snap_cmtime(os
->os_dsl_dataset
->ds_dir
));
672 /* called from dsl for meta-objset */
674 dmu_objset_create_impl(spa_t
*spa
, dsl_dataset_t
*ds
, blkptr_t
*bp
,
675 dmu_objset_type_t type
, dmu_tx_t
*tx
)
680 ASSERT(dmu_tx_is_syncing(tx
));
683 VERIFY0(dmu_objset_from_ds(ds
, &os
));
685 VERIFY0(dmu_objset_open_impl(spa
, NULL
, bp
, &os
));
687 mdn
= DMU_META_DNODE(os
);
689 dnode_allocate(mdn
, DMU_OT_DNODE
, 1 << DNODE_BLOCK_SHIFT
,
690 DN_MAX_INDBLKSHIFT
, DMU_OT_NONE
, 0, tx
);
693 * We don't want to have to increase the meta-dnode's nlevels
694 * later, because then we could do it in quescing context while
695 * we are also accessing it in open context.
697 * This precaution is not necessary for the MOS (ds == NULL),
698 * because the MOS is only updated in syncing context.
699 * This is most fortunate: the MOS is the only objset that
700 * needs to be synced multiple times as spa_sync() iterates
701 * to convergence, so minimizing its dn_nlevels matters.
707 * Determine the number of levels necessary for the meta-dnode
708 * to contain DN_MAX_OBJECT dnodes.
710 while ((uint64_t)mdn
->dn_nblkptr
<< (mdn
->dn_datablkshift
+
711 (levels
- 1) * (mdn
->dn_indblkshift
- SPA_BLKPTRSHIFT
)) <
712 DN_MAX_OBJECT
* sizeof (dnode_phys_t
))
715 mdn
->dn_next_nlevels
[tx
->tx_txg
& TXG_MASK
] =
716 mdn
->dn_nlevels
= levels
;
719 ASSERT(type
!= DMU_OST_NONE
);
720 ASSERT(type
!= DMU_OST_ANY
);
721 ASSERT(type
< DMU_OST_NUMTYPES
);
722 os
->os_phys
->os_type
= type
;
723 if (dmu_objset_userused_enabled(os
)) {
724 os
->os_phys
->os_flags
|= OBJSET_FLAG_USERACCOUNTING_COMPLETE
;
725 os
->os_flags
= os
->os_phys
->os_flags
;
728 dsl_dataset_dirty(ds
, tx
);
733 typedef struct dmu_objset_create_arg
{
734 const char *doca_name
;
736 void (*doca_userfunc
)(objset_t
*os
, void *arg
,
737 cred_t
*cr
, dmu_tx_t
*tx
);
739 dmu_objset_type_t doca_type
;
741 } dmu_objset_create_arg_t
;
745 dmu_objset_create_check(void *arg
, dmu_tx_t
*tx
)
747 dmu_objset_create_arg_t
*doca
= arg
;
748 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
753 if (strchr(doca
->doca_name
, '@') != NULL
)
754 return (SET_ERROR(EINVAL
));
756 error
= dsl_dir_hold(dp
, doca
->doca_name
, FTAG
, &pdd
, &tail
);
760 dsl_dir_rele(pdd
, FTAG
);
761 return (SET_ERROR(EEXIST
));
763 dsl_dir_rele(pdd
, FTAG
);
769 dmu_objset_create_sync(void *arg
, dmu_tx_t
*tx
)
771 dmu_objset_create_arg_t
*doca
= arg
;
772 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
780 VERIFY0(dsl_dir_hold(dp
, doca
->doca_name
, FTAG
, &pdd
, &tail
));
782 obj
= dsl_dataset_create_sync(pdd
, tail
, NULL
, doca
->doca_flags
,
783 doca
->doca_cred
, tx
);
785 VERIFY0(dsl_dataset_hold_obj(pdd
->dd_pool
, obj
, FTAG
, &ds
));
786 bp
= dsl_dataset_get_blkptr(ds
);
787 os
= dmu_objset_create_impl(pdd
->dd_pool
->dp_spa
,
788 ds
, bp
, doca
->doca_type
, tx
);
790 if (doca
->doca_userfunc
!= NULL
) {
791 doca
->doca_userfunc(os
, doca
->doca_userarg
,
792 doca
->doca_cred
, tx
);
795 spa_history_log_internal_ds(ds
, "create", tx
, "");
796 dsl_dataset_rele(ds
, FTAG
);
797 dsl_dir_rele(pdd
, FTAG
);
801 dmu_objset_create(const char *name
, dmu_objset_type_t type
, uint64_t flags
,
802 void (*func
)(objset_t
*os
, void *arg
, cred_t
*cr
, dmu_tx_t
*tx
), void *arg
)
804 dmu_objset_create_arg_t doca
;
806 doca
.doca_name
= name
;
807 doca
.doca_cred
= CRED();
808 doca
.doca_flags
= flags
;
809 doca
.doca_userfunc
= func
;
810 doca
.doca_userarg
= arg
;
811 doca
.doca_type
= type
;
813 return (dsl_sync_task(name
,
814 dmu_objset_create_check
, dmu_objset_create_sync
, &doca
, 5));
817 typedef struct dmu_objset_clone_arg
{
818 const char *doca_clone
;
819 const char *doca_origin
;
821 } dmu_objset_clone_arg_t
;
825 dmu_objset_clone_check(void *arg
, dmu_tx_t
*tx
)
827 dmu_objset_clone_arg_t
*doca
= arg
;
831 dsl_dataset_t
*origin
;
832 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
834 if (strchr(doca
->doca_clone
, '@') != NULL
)
835 return (SET_ERROR(EINVAL
));
837 error
= dsl_dir_hold(dp
, doca
->doca_clone
, FTAG
, &pdd
, &tail
);
841 dsl_dir_rele(pdd
, FTAG
);
842 return (SET_ERROR(EEXIST
));
844 /* You can't clone across pools. */
845 if (pdd
->dd_pool
!= dp
) {
846 dsl_dir_rele(pdd
, FTAG
);
847 return (SET_ERROR(EXDEV
));
849 dsl_dir_rele(pdd
, FTAG
);
851 error
= dsl_dataset_hold(dp
, doca
->doca_origin
, FTAG
, &origin
);
855 /* You can't clone across pools. */
856 if (origin
->ds_dir
->dd_pool
!= dp
) {
857 dsl_dataset_rele(origin
, FTAG
);
858 return (SET_ERROR(EXDEV
));
861 /* You can only clone snapshots, not the head datasets. */
862 if (!dsl_dataset_is_snapshot(origin
)) {
863 dsl_dataset_rele(origin
, FTAG
);
864 return (SET_ERROR(EINVAL
));
866 dsl_dataset_rele(origin
, FTAG
);
872 dmu_objset_clone_sync(void *arg
, dmu_tx_t
*tx
)
874 dmu_objset_clone_arg_t
*doca
= arg
;
875 dsl_pool_t
*dp
= dmu_tx_pool(tx
);
878 dsl_dataset_t
*origin
, *ds
;
880 char namebuf
[MAXNAMELEN
];
882 VERIFY0(dsl_dir_hold(dp
, doca
->doca_clone
, FTAG
, &pdd
, &tail
));
883 VERIFY0(dsl_dataset_hold(dp
, doca
->doca_origin
, FTAG
, &origin
));
885 obj
= dsl_dataset_create_sync(pdd
, tail
, origin
, 0,
886 doca
->doca_cred
, tx
);
888 VERIFY0(dsl_dataset_hold_obj(pdd
->dd_pool
, obj
, FTAG
, &ds
));
889 dsl_dataset_name(origin
, namebuf
);
890 spa_history_log_internal_ds(ds
, "clone", tx
,
891 "origin=%s (%llu)", namebuf
, origin
->ds_object
);
892 dsl_dataset_rele(ds
, FTAG
);
893 dsl_dataset_rele(origin
, FTAG
);
894 dsl_dir_rele(pdd
, FTAG
);
898 dmu_objset_clone(const char *clone
, const char *origin
)
900 dmu_objset_clone_arg_t doca
;
902 doca
.doca_clone
= clone
;
903 doca
.doca_origin
= origin
;
904 doca
.doca_cred
= CRED();
906 return (dsl_sync_task(clone
,
907 dmu_objset_clone_check
, dmu_objset_clone_sync
, &doca
, 5));
911 dmu_objset_snapshot_one(const char *fsname
, const char *snapname
)
914 char *longsnap
= kmem_asprintf("%s@%s", fsname
, snapname
);
915 nvlist_t
*snaps
= fnvlist_alloc();
917 fnvlist_add_boolean(snaps
, longsnap
);
919 err
= dsl_dataset_snapshot(snaps
, NULL
, NULL
);
925 dmu_objset_sync_dnodes(list_t
*list
, list_t
*newlist
, dmu_tx_t
*tx
)
929 while ((dn
= list_head(list
))) {
930 ASSERT(dn
->dn_object
!= DMU_META_DNODE_OBJECT
);
931 ASSERT(dn
->dn_dbuf
->db_data_pending
);
933 * Initialize dn_zio outside dnode_sync() because the
934 * meta-dnode needs to set it ouside dnode_sync().
936 dn
->dn_zio
= dn
->dn_dbuf
->db_data_pending
->dr_zio
;
939 ASSERT3U(dn
->dn_nlevels
, <=, DN_MAX_LEVELS
);
940 list_remove(list
, dn
);
943 (void) dnode_add_ref(dn
, newlist
);
944 list_insert_tail(newlist
, dn
);
953 dmu_objset_write_ready(zio_t
*zio
, arc_buf_t
*abuf
, void *arg
)
957 blkptr_t
*bp
= zio
->io_bp
;
959 dnode_phys_t
*dnp
= &os
->os_phys
->os_meta_dnode
;
961 ASSERT3P(bp
, ==, os
->os_rootbp
);
962 ASSERT3U(BP_GET_TYPE(bp
), ==, DMU_OT_OBJSET
);
963 ASSERT0(BP_GET_LEVEL(bp
));
966 * Update rootbp fill count: it should be the number of objects
967 * allocated in the object set (not counting the "special"
968 * objects that are stored in the objset_phys_t -- the meta
969 * dnode and user/group accounting objects).
972 for (i
= 0; i
< dnp
->dn_nblkptr
; i
++)
973 bp
->blk_fill
+= dnp
->dn_blkptr
[i
].blk_fill
;
978 dmu_objset_write_done(zio_t
*zio
, arc_buf_t
*abuf
, void *arg
)
980 blkptr_t
*bp
= zio
->io_bp
;
981 blkptr_t
*bp_orig
= &zio
->io_bp_orig
;
984 if (zio
->io_flags
& ZIO_FLAG_IO_REWRITE
) {
985 ASSERT(BP_EQUAL(bp
, bp_orig
));
987 dsl_dataset_t
*ds
= os
->os_dsl_dataset
;
988 dmu_tx_t
*tx
= os
->os_synctx
;
990 (void) dsl_dataset_block_kill(ds
, bp_orig
, tx
, B_TRUE
);
991 dsl_dataset_block_born(ds
, bp
, tx
);
995 /* called from dsl */
997 dmu_objset_sync(objset_t
*os
, zio_t
*pio
, dmu_tx_t
*tx
)
1004 list_t
*newlist
= NULL
;
1005 dbuf_dirty_record_t
*dr
;
1007 dprintf_ds(os
->os_dsl_dataset
, "txg=%llu\n", tx
->tx_txg
);
1009 ASSERT(dmu_tx_is_syncing(tx
));
1010 /* XXX the write_done callback should really give us the tx... */
1013 if (os
->os_dsl_dataset
== NULL
) {
1015 * This is the MOS. If we have upgraded,
1016 * spa_max_replication() could change, so reset
1019 os
->os_copies
= spa_max_replication(os
->os_spa
);
1023 * Create the root block IO
1025 SET_BOOKMARK(&zb
, os
->os_dsl_dataset
?
1026 os
->os_dsl_dataset
->ds_object
: DMU_META_OBJSET
,
1027 ZB_ROOT_OBJECT
, ZB_ROOT_LEVEL
, ZB_ROOT_BLKID
);
1028 arc_release(os
->os_phys_buf
, &os
->os_phys_buf
);
1030 dmu_write_policy(os
, NULL
, 0, 0, &zp
);
1032 zio
= arc_write(pio
, os
->os_spa
, tx
->tx_txg
,
1033 os
->os_rootbp
, os
->os_phys_buf
, DMU_OS_IS_L2CACHEABLE(os
),
1034 DMU_OS_IS_L2COMPRESSIBLE(os
), &zp
, dmu_objset_write_ready
,
1035 NULL
, dmu_objset_write_done
, os
, ZIO_PRIORITY_ASYNC_WRITE
,
1036 ZIO_FLAG_MUSTSUCCEED
, &zb
);
1039 * Sync special dnodes - the parent IO for the sync is the root block
1041 DMU_META_DNODE(os
)->dn_zio
= zio
;
1042 dnode_sync(DMU_META_DNODE(os
), tx
);
1044 os
->os_phys
->os_flags
= os
->os_flags
;
1046 if (DMU_USERUSED_DNODE(os
) &&
1047 DMU_USERUSED_DNODE(os
)->dn_type
!= DMU_OT_NONE
) {
1048 DMU_USERUSED_DNODE(os
)->dn_zio
= zio
;
1049 dnode_sync(DMU_USERUSED_DNODE(os
), tx
);
1050 DMU_GROUPUSED_DNODE(os
)->dn_zio
= zio
;
1051 dnode_sync(DMU_GROUPUSED_DNODE(os
), tx
);
1054 txgoff
= tx
->tx_txg
& TXG_MASK
;
1056 if (dmu_objset_userused_enabled(os
)) {
1057 newlist
= &os
->os_synced_dnodes
;
1059 * We must create the list here because it uses the
1060 * dn_dirty_link[] of this txg.
1062 list_create(newlist
, sizeof (dnode_t
),
1063 offsetof(dnode_t
, dn_dirty_link
[txgoff
]));
1066 dmu_objset_sync_dnodes(&os
->os_free_dnodes
[txgoff
], newlist
, tx
);
1067 dmu_objset_sync_dnodes(&os
->os_dirty_dnodes
[txgoff
], newlist
, tx
);
1069 list
= &DMU_META_DNODE(os
)->dn_dirty_records
[txgoff
];
1070 while ((dr
= list_head(list
))) {
1071 ASSERT0(dr
->dr_dbuf
->db_level
);
1072 list_remove(list
, dr
);
1074 zio_nowait(dr
->dr_zio
);
1077 * Free intent log blocks up to this tx.
1079 zil_sync(os
->os_zil
, tx
);
1080 os
->os_phys
->os_zil_header
= os
->os_zil_header
;
1085 dmu_objset_is_dirty(objset_t
*os
, uint64_t txg
)
1087 return (!list_is_empty(&os
->os_dirty_dnodes
[txg
& TXG_MASK
]) ||
1088 !list_is_empty(&os
->os_free_dnodes
[txg
& TXG_MASK
]));
1091 static objset_used_cb_t
*used_cbs
[DMU_OST_NUMTYPES
];
1094 dmu_objset_register_type(dmu_objset_type_t ost
, objset_used_cb_t
*cb
)
1100 dmu_objset_userused_enabled(objset_t
*os
)
1102 return (spa_version(os
->os_spa
) >= SPA_VERSION_USERSPACE
&&
1103 used_cbs
[os
->os_phys
->os_type
] != NULL
&&
1104 DMU_USERUSED_DNODE(os
) != NULL
);
1108 do_userquota_update(objset_t
*os
, uint64_t used
, uint64_t flags
,
1109 uint64_t user
, uint64_t group
, boolean_t subtract
, dmu_tx_t
*tx
)
1111 if ((flags
& DNODE_FLAG_USERUSED_ACCOUNTED
)) {
1112 int64_t delta
= DNODE_SIZE
+ used
;
1115 VERIFY3U(0, ==, zap_increment_int(os
, DMU_USERUSED_OBJECT
,
1117 VERIFY3U(0, ==, zap_increment_int(os
, DMU_GROUPUSED_OBJECT
,
1123 dmu_objset_do_userquota_updates(objset_t
*os
, dmu_tx_t
*tx
)
1126 list_t
*list
= &os
->os_synced_dnodes
;
1128 ASSERT(list_head(list
) == NULL
|| dmu_objset_userused_enabled(os
));
1130 while ((dn
= list_head(list
))) {
1132 ASSERT(!DMU_OBJECT_IS_SPECIAL(dn
->dn_object
));
1133 ASSERT(dn
->dn_phys
->dn_type
== DMU_OT_NONE
||
1134 dn
->dn_phys
->dn_flags
&
1135 DNODE_FLAG_USERUSED_ACCOUNTED
);
1137 /* Allocate the user/groupused objects if necessary. */
1138 if (DMU_USERUSED_DNODE(os
)->dn_type
== DMU_OT_NONE
) {
1139 VERIFY(0 == zap_create_claim(os
,
1140 DMU_USERUSED_OBJECT
,
1141 DMU_OT_USERGROUP_USED
, DMU_OT_NONE
, 0, tx
));
1142 VERIFY(0 == zap_create_claim(os
,
1143 DMU_GROUPUSED_OBJECT
,
1144 DMU_OT_USERGROUP_USED
, DMU_OT_NONE
, 0, tx
));
1148 * We intentionally modify the zap object even if the
1149 * net delta is zero. Otherwise
1150 * the block of the zap obj could be shared between
1151 * datasets but need to be different between them after
1155 flags
= dn
->dn_id_flags
;
1157 if (flags
& DN_ID_OLD_EXIST
) {
1158 do_userquota_update(os
, dn
->dn_oldused
, dn
->dn_oldflags
,
1159 dn
->dn_olduid
, dn
->dn_oldgid
, B_TRUE
, tx
);
1161 if (flags
& DN_ID_NEW_EXIST
) {
1162 do_userquota_update(os
, DN_USED_BYTES(dn
->dn_phys
),
1163 dn
->dn_phys
->dn_flags
, dn
->dn_newuid
,
1164 dn
->dn_newgid
, B_FALSE
, tx
);
1167 mutex_enter(&dn
->dn_mtx
);
1169 dn
->dn_oldflags
= 0;
1170 if (dn
->dn_id_flags
& DN_ID_NEW_EXIST
) {
1171 dn
->dn_olduid
= dn
->dn_newuid
;
1172 dn
->dn_oldgid
= dn
->dn_newgid
;
1173 dn
->dn_id_flags
|= DN_ID_OLD_EXIST
;
1174 if (dn
->dn_bonuslen
== 0)
1175 dn
->dn_id_flags
|= DN_ID_CHKED_SPILL
;
1177 dn
->dn_id_flags
|= DN_ID_CHKED_BONUS
;
1179 dn
->dn_id_flags
&= ~(DN_ID_NEW_EXIST
);
1180 mutex_exit(&dn
->dn_mtx
);
1182 list_remove(list
, dn
);
1183 dnode_rele(dn
, list
);
1188 * Returns a pointer to data to find uid/gid from
1190 * If a dirty record for transaction group that is syncing can't
1191 * be found then NULL is returned. In the NULL case it is assumed
1192 * the uid/gid aren't changing.
1195 dmu_objset_userquota_find_data(dmu_buf_impl_t
*db
, dmu_tx_t
*tx
)
1197 dbuf_dirty_record_t
*dr
, **drp
;
1200 if (db
->db_dirtycnt
== 0)
1201 return (db
->db
.db_data
); /* Nothing is changing */
1203 for (drp
= &db
->db_last_dirty
; (dr
= *drp
) != NULL
; drp
= &dr
->dr_next
)
1204 if (dr
->dr_txg
== tx
->tx_txg
)
1212 DB_DNODE_ENTER(dr
->dr_dbuf
);
1213 dn
= DB_DNODE(dr
->dr_dbuf
);
1215 if (dn
->dn_bonuslen
== 0 &&
1216 dr
->dr_dbuf
->db_blkid
== DMU_SPILL_BLKID
)
1217 data
= dr
->dt
.dl
.dr_data
->b_data
;
1219 data
= dr
->dt
.dl
.dr_data
;
1221 DB_DNODE_EXIT(dr
->dr_dbuf
);
1228 dmu_objset_userquota_get_ids(dnode_t
*dn
, boolean_t before
, dmu_tx_t
*tx
)
1230 objset_t
*os
= dn
->dn_objset
;
1232 dmu_buf_impl_t
*db
= NULL
;
1233 uint64_t *user
= NULL
;
1234 uint64_t *group
= NULL
;
1235 int flags
= dn
->dn_id_flags
;
1237 boolean_t have_spill
= B_FALSE
;
1239 if (!dmu_objset_userused_enabled(dn
->dn_objset
))
1242 if (before
&& (flags
& (DN_ID_CHKED_BONUS
|DN_ID_OLD_EXIST
|
1243 DN_ID_CHKED_SPILL
)))
1246 if (before
&& dn
->dn_bonuslen
!= 0)
1247 data
= DN_BONUS(dn
->dn_phys
);
1248 else if (!before
&& dn
->dn_bonuslen
!= 0) {
1251 mutex_enter(&db
->db_mtx
);
1252 data
= dmu_objset_userquota_find_data(db
, tx
);
1254 data
= DN_BONUS(dn
->dn_phys
);
1256 } else if (dn
->dn_bonuslen
== 0 && dn
->dn_bonustype
== DMU_OT_SA
) {
1259 if (RW_WRITE_HELD(&dn
->dn_struct_rwlock
))
1260 rf
|= DB_RF_HAVESTRUCT
;
1261 error
= dmu_spill_hold_by_dnode(dn
,
1262 rf
| DB_RF_MUST_SUCCEED
,
1263 FTAG
, (dmu_buf_t
**)&db
);
1265 mutex_enter(&db
->db_mtx
);
1266 data
= (before
) ? db
->db
.db_data
:
1267 dmu_objset_userquota_find_data(db
, tx
);
1268 have_spill
= B_TRUE
;
1270 mutex_enter(&dn
->dn_mtx
);
1271 dn
->dn_id_flags
|= DN_ID_CHKED_BONUS
;
1272 mutex_exit(&dn
->dn_mtx
);
1278 user
= &dn
->dn_olduid
;
1279 group
= &dn
->dn_oldgid
;
1281 user
= &dn
->dn_newuid
;
1282 group
= &dn
->dn_newgid
;
1286 * Must always call the callback in case the object
1287 * type has changed and that type isn't an object type to track
1289 error
= used_cbs
[os
->os_phys
->os_type
](dn
->dn_bonustype
, data
,
1293 * Preserve existing uid/gid when the callback can't determine
1294 * what the new uid/gid are and the callback returned EEXIST.
1295 * The EEXIST error tells us to just use the existing uid/gid.
1296 * If we don't know what the old values are then just assign
1297 * them to 0, since that is a new file being created.
1299 if (!before
&& data
== NULL
&& error
== EEXIST
) {
1300 if (flags
& DN_ID_OLD_EXIST
) {
1301 dn
->dn_newuid
= dn
->dn_olduid
;
1302 dn
->dn_newgid
= dn
->dn_oldgid
;
1311 mutex_exit(&db
->db_mtx
);
1313 mutex_enter(&dn
->dn_mtx
);
1314 if (error
== 0 && before
)
1315 dn
->dn_id_flags
|= DN_ID_OLD_EXIST
;
1316 if (error
== 0 && !before
)
1317 dn
->dn_id_flags
|= DN_ID_NEW_EXIST
;
1320 dn
->dn_id_flags
|= DN_ID_CHKED_SPILL
;
1322 dn
->dn_id_flags
|= DN_ID_CHKED_BONUS
;
1324 mutex_exit(&dn
->dn_mtx
);
1326 dmu_buf_rele((dmu_buf_t
*)db
, FTAG
);
1330 dmu_objset_userspace_present(objset_t
*os
)
1332 return (os
->os_phys
->os_flags
&
1333 OBJSET_FLAG_USERACCOUNTING_COMPLETE
);
1337 dmu_objset_userspace_upgrade(objset_t
*os
)
1342 if (dmu_objset_userspace_present(os
))
1344 if (!dmu_objset_userused_enabled(os
))
1345 return (SET_ERROR(ENOTSUP
));
1346 if (dmu_objset_is_snapshot(os
))
1347 return (SET_ERROR(EINVAL
));
1350 * We simply need to mark every object dirty, so that it will be
1351 * synced out and now accounted. If this is called
1352 * concurrently, or if we already did some work before crashing,
1353 * that's fine, since we track each object's accounted state
1357 for (obj
= 0; err
== 0; err
= dmu_object_next(os
, &obj
, FALSE
, 0)) {
1362 if (issig(JUSTLOOKING
) && issig(FORREAL
))
1363 return (SET_ERROR(EINTR
));
1365 objerr
= dmu_bonus_hold(os
, obj
, FTAG
, &db
);
1368 tx
= dmu_tx_create(os
);
1369 dmu_tx_hold_bonus(tx
, obj
);
1370 objerr
= dmu_tx_assign(tx
, TXG_WAIT
);
1375 dmu_buf_will_dirty(db
, tx
);
1376 dmu_buf_rele(db
, FTAG
);
1380 os
->os_flags
|= OBJSET_FLAG_USERACCOUNTING_COMPLETE
;
1381 txg_wait_synced(dmu_objset_pool(os
), 0);
1386 dmu_objset_space(objset_t
*os
, uint64_t *refdbytesp
, uint64_t *availbytesp
,
1387 uint64_t *usedobjsp
, uint64_t *availobjsp
)
1389 dsl_dataset_space(os
->os_dsl_dataset
, refdbytesp
, availbytesp
,
1390 usedobjsp
, availobjsp
);
1394 dmu_objset_fsid_guid(objset_t
*os
)
1396 return (dsl_dataset_fsid_guid(os
->os_dsl_dataset
));
1400 dmu_objset_fast_stat(objset_t
*os
, dmu_objset_stats_t
*stat
)
1402 stat
->dds_type
= os
->os_phys
->os_type
;
1403 if (os
->os_dsl_dataset
)
1404 dsl_dataset_fast_stat(os
->os_dsl_dataset
, stat
);
1408 dmu_objset_stats(objset_t
*os
, nvlist_t
*nv
)
1410 ASSERT(os
->os_dsl_dataset
||
1411 os
->os_phys
->os_type
== DMU_OST_META
);
1413 if (os
->os_dsl_dataset
!= NULL
)
1414 dsl_dataset_stats(os
->os_dsl_dataset
, nv
);
1416 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_TYPE
,
1417 os
->os_phys
->os_type
);
1418 dsl_prop_nvlist_add_uint64(nv
, ZFS_PROP_USERACCOUNTING
,
1419 dmu_objset_userspace_present(os
));
1423 dmu_objset_is_snapshot(objset_t
*os
)
1425 if (os
->os_dsl_dataset
!= NULL
)
1426 return (dsl_dataset_is_snapshot(os
->os_dsl_dataset
));
1432 dmu_snapshot_realname(objset_t
*os
, char *name
, char *real
, int maxlen
,
1433 boolean_t
*conflict
)
1435 dsl_dataset_t
*ds
= os
->os_dsl_dataset
;
1438 if (ds
->ds_phys
->ds_snapnames_zapobj
== 0)
1439 return (SET_ERROR(ENOENT
));
1441 return (zap_lookup_norm(ds
->ds_dir
->dd_pool
->dp_meta_objset
,
1442 ds
->ds_phys
->ds_snapnames_zapobj
, name
, 8, 1, &ignored
, MT_FIRST
,
1443 real
, maxlen
, conflict
));
1447 dmu_snapshot_list_next(objset_t
*os
, int namelen
, char *name
,
1448 uint64_t *idp
, uint64_t *offp
, boolean_t
*case_conflict
)
1450 dsl_dataset_t
*ds
= os
->os_dsl_dataset
;
1451 zap_cursor_t cursor
;
1452 zap_attribute_t attr
;
1454 ASSERT(dsl_pool_config_held(dmu_objset_pool(os
)));
1456 if (ds
->ds_phys
->ds_snapnames_zapobj
== 0)
1457 return (SET_ERROR(ENOENT
));
1459 zap_cursor_init_serialized(&cursor
,
1460 ds
->ds_dir
->dd_pool
->dp_meta_objset
,
1461 ds
->ds_phys
->ds_snapnames_zapobj
, *offp
);
1463 if (zap_cursor_retrieve(&cursor
, &attr
) != 0) {
1464 zap_cursor_fini(&cursor
);
1465 return (SET_ERROR(ENOENT
));
1468 if (strlen(attr
.za_name
) + 1 > namelen
) {
1469 zap_cursor_fini(&cursor
);
1470 return (SET_ERROR(ENAMETOOLONG
));
1473 (void) strcpy(name
, attr
.za_name
);
1475 *idp
= attr
.za_first_integer
;
1477 *case_conflict
= attr
.za_normalization_conflict
;
1478 zap_cursor_advance(&cursor
);
1479 *offp
= zap_cursor_serialize(&cursor
);
1480 zap_cursor_fini(&cursor
);
1486 dmu_snapshot_lookup(objset_t
*os
, const char *name
, uint64_t *value
)
1488 return (dsl_dataset_snap_lookup(os
->os_dsl_dataset
, name
, value
));
1492 dmu_dir_list_next(objset_t
*os
, int namelen
, char *name
,
1493 uint64_t *idp
, uint64_t *offp
)
1495 dsl_dir_t
*dd
= os
->os_dsl_dataset
->ds_dir
;
1496 zap_cursor_t cursor
;
1497 zap_attribute_t attr
;
1499 /* there is no next dir on a snapshot! */
1500 if (os
->os_dsl_dataset
->ds_object
!=
1501 dd
->dd_phys
->dd_head_dataset_obj
)
1502 return (SET_ERROR(ENOENT
));
1504 zap_cursor_init_serialized(&cursor
,
1505 dd
->dd_pool
->dp_meta_objset
,
1506 dd
->dd_phys
->dd_child_dir_zapobj
, *offp
);
1508 if (zap_cursor_retrieve(&cursor
, &attr
) != 0) {
1509 zap_cursor_fini(&cursor
);
1510 return (SET_ERROR(ENOENT
));
1513 if (strlen(attr
.za_name
) + 1 > namelen
) {
1514 zap_cursor_fini(&cursor
);
1515 return (SET_ERROR(ENAMETOOLONG
));
1518 (void) strcpy(name
, attr
.za_name
);
1520 *idp
= attr
.za_first_integer
;
1521 zap_cursor_advance(&cursor
);
1522 *offp
= zap_cursor_serialize(&cursor
);
1523 zap_cursor_fini(&cursor
);
1529 * Find objsets under and including ddobj, call func(ds) on each.
1532 dmu_objset_find_dp(dsl_pool_t
*dp
, uint64_t ddobj
,
1533 int func(dsl_pool_t
*, dsl_dataset_t
*, void *), void *arg
, int flags
)
1538 zap_attribute_t
*attr
;
1542 ASSERT(dsl_pool_config_held(dp
));
1544 err
= dsl_dir_hold_obj(dp
, ddobj
, NULL
, FTAG
, &dd
);
1548 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1549 if (dd
->dd_myname
[0] == '$') {
1550 dsl_dir_rele(dd
, FTAG
);
1554 thisobj
= dd
->dd_phys
->dd_head_dataset_obj
;
1555 attr
= kmem_alloc(sizeof (zap_attribute_t
), KM_PUSHPAGE
);
1558 * Iterate over all children.
1560 if (flags
& DS_FIND_CHILDREN
) {
1561 for (zap_cursor_init(&zc
, dp
->dp_meta_objset
,
1562 dd
->dd_phys
->dd_child_dir_zapobj
);
1563 zap_cursor_retrieve(&zc
, attr
) == 0;
1564 (void) zap_cursor_advance(&zc
)) {
1565 ASSERT3U(attr
->za_integer_length
, ==,
1567 ASSERT3U(attr
->za_num_integers
, ==, 1);
1569 err
= dmu_objset_find_dp(dp
, attr
->za_first_integer
,
1574 zap_cursor_fini(&zc
);
1577 dsl_dir_rele(dd
, FTAG
);
1578 kmem_free(attr
, sizeof (zap_attribute_t
));
1584 * Iterate over all snapshots.
1586 if (flags
& DS_FIND_SNAPSHOTS
) {
1588 err
= dsl_dataset_hold_obj(dp
, thisobj
, FTAG
, &ds
);
1591 uint64_t snapobj
= ds
->ds_phys
->ds_snapnames_zapobj
;
1592 dsl_dataset_rele(ds
, FTAG
);
1594 for (zap_cursor_init(&zc
, dp
->dp_meta_objset
, snapobj
);
1595 zap_cursor_retrieve(&zc
, attr
) == 0;
1596 (void) zap_cursor_advance(&zc
)) {
1597 ASSERT3U(attr
->za_integer_length
, ==,
1599 ASSERT3U(attr
->za_num_integers
, ==, 1);
1601 err
= dsl_dataset_hold_obj(dp
,
1602 attr
->za_first_integer
, FTAG
, &ds
);
1605 err
= func(dp
, ds
, arg
);
1606 dsl_dataset_rele(ds
, FTAG
);
1610 zap_cursor_fini(&zc
);
1614 dsl_dir_rele(dd
, FTAG
);
1615 kmem_free(attr
, sizeof (zap_attribute_t
));
1623 err
= dsl_dataset_hold_obj(dp
, thisobj
, FTAG
, &ds
);
1626 err
= func(dp
, ds
, arg
);
1627 dsl_dataset_rele(ds
, FTAG
);
1632 * Find all objsets under name, and for each, call 'func(child_name, arg)'.
1633 * The dp_config_rwlock must not be held when this is called, and it
1634 * will not be held when the callback is called.
1635 * Therefore this function should only be used when the pool is not changing
1636 * (e.g. in syncing context), or the callback can deal with the possible races.
1639 dmu_objset_find_impl(spa_t
*spa
, const char *name
,
1640 int func(const char *, void *), void *arg
, int flags
)
1643 dsl_pool_t
*dp
= spa_get_dsl(spa
);
1646 zap_attribute_t
*attr
;
1651 dsl_pool_config_enter(dp
, FTAG
);
1653 err
= dsl_dir_hold(dp
, name
, FTAG
, &dd
, NULL
);
1655 dsl_pool_config_exit(dp
, FTAG
);
1659 /* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1660 if (dd
->dd_myname
[0] == '$') {
1661 dsl_dir_rele(dd
, FTAG
);
1662 dsl_pool_config_exit(dp
, FTAG
);
1666 thisobj
= dd
->dd_phys
->dd_head_dataset_obj
;
1667 attr
= kmem_alloc(sizeof (zap_attribute_t
), KM_PUSHPAGE
);
1670 * Iterate over all children.
1672 if (flags
& DS_FIND_CHILDREN
) {
1673 for (zap_cursor_init(&zc
, dp
->dp_meta_objset
,
1674 dd
->dd_phys
->dd_child_dir_zapobj
);
1675 zap_cursor_retrieve(&zc
, attr
) == 0;
1676 (void) zap_cursor_advance(&zc
)) {
1677 ASSERT3U(attr
->za_integer_length
, ==,
1679 ASSERT3U(attr
->za_num_integers
, ==, 1);
1681 child
= kmem_asprintf("%s/%s", name
, attr
->za_name
);
1682 dsl_pool_config_exit(dp
, FTAG
);
1683 err
= dmu_objset_find_impl(spa
, child
,
1685 dsl_pool_config_enter(dp
, FTAG
);
1690 zap_cursor_fini(&zc
);
1693 dsl_dir_rele(dd
, FTAG
);
1694 dsl_pool_config_exit(dp
, FTAG
);
1695 kmem_free(attr
, sizeof (zap_attribute_t
));
1701 * Iterate over all snapshots.
1703 if (flags
& DS_FIND_SNAPSHOTS
) {
1704 err
= dsl_dataset_hold_obj(dp
, thisobj
, FTAG
, &ds
);
1707 uint64_t snapobj
= ds
->ds_phys
->ds_snapnames_zapobj
;
1708 dsl_dataset_rele(ds
, FTAG
);
1710 for (zap_cursor_init(&zc
, dp
->dp_meta_objset
, snapobj
);
1711 zap_cursor_retrieve(&zc
, attr
) == 0;
1712 (void) zap_cursor_advance(&zc
)) {
1713 ASSERT3U(attr
->za_integer_length
, ==,
1715 ASSERT3U(attr
->za_num_integers
, ==, 1);
1717 child
= kmem_asprintf("%s@%s",
1718 name
, attr
->za_name
);
1719 dsl_pool_config_exit(dp
, FTAG
);
1720 err
= func(child
, arg
);
1721 dsl_pool_config_enter(dp
, FTAG
);
1726 zap_cursor_fini(&zc
);
1730 dsl_dir_rele(dd
, FTAG
);
1731 kmem_free(attr
, sizeof (zap_attribute_t
));
1732 dsl_pool_config_exit(dp
, FTAG
);
1737 /* Apply to self. */
1738 return (func(name
, arg
));
1742 * See comment above dmu_objset_find_impl().
1745 dmu_objset_find(char *name
, int func(const char *, void *), void *arg
,
1751 error
= spa_open(name
, &spa
, FTAG
);
1754 error
= dmu_objset_find_impl(spa
, name
, func
, arg
, flags
);
1755 spa_close(spa
, FTAG
);
1760 dmu_objset_set_user(objset_t
*os
, void *user_ptr
)
1762 ASSERT(MUTEX_HELD(&os
->os_user_ptr_lock
));
1763 os
->os_user_ptr
= user_ptr
;
1767 dmu_objset_get_user(objset_t
*os
)
1769 ASSERT(MUTEX_HELD(&os
->os_user_ptr_lock
));
1770 return (os
->os_user_ptr
);
1774 * Determine name of filesystem, given name of snapshot.
1775 * buf must be at least MAXNAMELEN bytes
1778 dmu_fsname(const char *snapname
, char *buf
)
1780 char *atp
= strchr(snapname
, '@');
1782 return (SET_ERROR(EINVAL
));
1783 if (atp
- snapname
>= MAXNAMELEN
)
1784 return (SET_ERROR(ENAMETOOLONG
));
1785 (void) strlcpy(buf
, snapname
, atp
- snapname
+ 1);
1789 #if defined(_KERNEL) && defined(HAVE_SPL)
1790 EXPORT_SYMBOL(dmu_objset_zil
);
1791 EXPORT_SYMBOL(dmu_objset_pool
);
1792 EXPORT_SYMBOL(dmu_objset_ds
);
1793 EXPORT_SYMBOL(dmu_objset_type
);
1794 EXPORT_SYMBOL(dmu_objset_name
);
1795 EXPORT_SYMBOL(dmu_objset_hold
);
1796 EXPORT_SYMBOL(dmu_objset_own
);
1797 EXPORT_SYMBOL(dmu_objset_rele
);
1798 EXPORT_SYMBOL(dmu_objset_disown
);
1799 EXPORT_SYMBOL(dmu_objset_from_ds
);
1800 EXPORT_SYMBOL(dmu_objset_create
);
1801 EXPORT_SYMBOL(dmu_objset_clone
);
1802 EXPORT_SYMBOL(dmu_objset_stats
);
1803 EXPORT_SYMBOL(dmu_objset_fast_stat
);
1804 EXPORT_SYMBOL(dmu_objset_spa
);
1805 EXPORT_SYMBOL(dmu_objset_space
);
1806 EXPORT_SYMBOL(dmu_objset_fsid_guid
);
1807 EXPORT_SYMBOL(dmu_objset_find
);
1808 EXPORT_SYMBOL(dmu_objset_byteswap
);
1809 EXPORT_SYMBOL(dmu_objset_evict_dbufs
);
1810 EXPORT_SYMBOL(dmu_objset_snap_cmtime
);
1812 EXPORT_SYMBOL(dmu_objset_sync
);
1813 EXPORT_SYMBOL(dmu_objset_is_dirty
);
1814 EXPORT_SYMBOL(dmu_objset_create_impl
);
1815 EXPORT_SYMBOL(dmu_objset_open_impl
);
1816 EXPORT_SYMBOL(dmu_objset_evict
);
1817 EXPORT_SYMBOL(dmu_objset_register_type
);
1818 EXPORT_SYMBOL(dmu_objset_do_userquota_updates
);
1819 EXPORT_SYMBOL(dmu_objset_userquota_get_ids
);
1820 EXPORT_SYMBOL(dmu_objset_userused_enabled
);
1821 EXPORT_SYMBOL(dmu_objset_userspace_upgrade
);
1822 EXPORT_SYMBOL(dmu_objset_userspace_present
);