4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
28 #include <sys/zfs_context.h>
30 #include <sys/refcount.h>
31 #include <sys/zap_impl.h>
32 #include <sys/zap_leaf.h>
36 #include <sys/sunddi.h>
39 static int mzap_upgrade(zap_t
**zapp
, dmu_tx_t
*tx
);
43 zap_hash(zap_t
*zap
, const char *normname
)
47 uint64_t crc
= zap
->zap_salt
;
49 /* NB: name must already be normalized, if necessary */
52 ASSERT(zfs_crc64_table
[128] == ZFS_CRC64_POLY
);
53 for (cp
= (const uint8_t *)normname
; (c
= *cp
) != '\0'; cp
++) {
54 crc
= (crc
>> 8) ^ zfs_crc64_table
[(crc
^ c
) & 0xFF];
58 * Only use 28 bits, since we need 4 bits in the cookie for the
59 * collision differentiator. We MUST use the high bits, since
60 * those are the ones that we first pay attention to when
63 crc
&= ~((1ULL << (64 - ZAP_HASHBITS
)) - 1);
69 zap_normalize(zap_t
*zap
, const char *name
, char *namenorm
)
74 inlen
= strlen(name
) + 1;
75 outlen
= ZAP_MAXNAMELEN
;
78 (void) u8_textprep_str((char *)name
, &inlen
, namenorm
, &outlen
,
79 zap
->zap_normflags
| U8_TEXTPREP_IGNORE_NULL
|
80 U8_TEXTPREP_IGNORE_INVALID
, U8_UNICODE_LATEST
, &err
);
86 zap_match(zap_name_t
*zn
, const char *matchname
)
88 if (zn
->zn_matchtype
== MT_FIRST
) {
89 char norm
[ZAP_MAXNAMELEN
];
91 if (zap_normalize(zn
->zn_zap
, matchname
, norm
) != 0)
94 return (strcmp(zn
->zn_name_norm
, norm
) == 0);
96 /* MT_BEST or MT_EXACT */
97 return (strcmp(zn
->zn_name_orij
, matchname
) == 0);
102 zap_name_free(zap_name_t
*zn
)
104 kmem_free(zn
, sizeof (zap_name_t
));
107 /* XXX combine this with zap_lockdir()? */
109 zap_name_alloc(zap_t
*zap
, const char *name
, matchtype_t mt
)
111 zap_name_t
*zn
= kmem_alloc(sizeof (zap_name_t
), KM_SLEEP
);
114 zn
->zn_name_orij
= name
;
115 zn
->zn_matchtype
= mt
;
116 if (zap
->zap_normflags
) {
117 if (zap_normalize(zap
, name
, zn
->zn_normbuf
) != 0) {
121 zn
->zn_name_norm
= zn
->zn_normbuf
;
123 if (mt
!= MT_EXACT
) {
127 zn
->zn_name_norm
= zn
->zn_name_orij
;
130 zn
->zn_hash
= zap_hash(zap
, zn
->zn_name_norm
);
135 mzap_byteswap(mzap_phys_t
*buf
, size_t size
)
138 buf
->mz_block_type
= BSWAP_64(buf
->mz_block_type
);
139 buf
->mz_salt
= BSWAP_64(buf
->mz_salt
);
140 buf
->mz_normflags
= BSWAP_64(buf
->mz_normflags
);
141 max
= (size
/ MZAP_ENT_LEN
) - 1;
142 for (i
= 0; i
< max
; i
++) {
143 buf
->mz_chunk
[i
].mze_value
=
144 BSWAP_64(buf
->mz_chunk
[i
].mze_value
);
145 buf
->mz_chunk
[i
].mze_cd
=
146 BSWAP_32(buf
->mz_chunk
[i
].mze_cd
);
151 zap_byteswap(void *buf
, size_t size
)
155 block_type
= *(uint64_t *)buf
;
157 if (block_type
== ZBT_MICRO
|| block_type
== BSWAP_64(ZBT_MICRO
)) {
158 /* ASSERT(magic == ZAP_LEAF_MAGIC); */
159 mzap_byteswap(buf
, size
);
161 fzap_byteswap(buf
, size
);
166 mze_compare(const void *arg1
, const void *arg2
)
168 const mzap_ent_t
*mze1
= arg1
;
169 const mzap_ent_t
*mze2
= arg2
;
171 if (mze1
->mze_hash
> mze2
->mze_hash
)
173 if (mze1
->mze_hash
< mze2
->mze_hash
)
175 if (mze1
->mze_phys
.mze_cd
> mze2
->mze_phys
.mze_cd
)
177 if (mze1
->mze_phys
.mze_cd
< mze2
->mze_phys
.mze_cd
)
183 mze_insert(zap_t
*zap
, int chunkid
, uint64_t hash
, mzap_ent_phys_t
*mzep
)
187 ASSERT(zap
->zap_ismicro
);
188 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
189 ASSERT(mzep
->mze_cd
< ZAP_MAXCD
);
191 mze
= kmem_alloc(sizeof (mzap_ent_t
), KM_SLEEP
);
192 mze
->mze_chunkid
= chunkid
;
193 mze
->mze_hash
= hash
;
194 mze
->mze_phys
= *mzep
;
195 avl_add(&zap
->zap_m
.zap_avl
, mze
);
199 mze_find(zap_name_t
*zn
)
201 mzap_ent_t mze_tofind
;
204 avl_tree_t
*avl
= &zn
->zn_zap
->zap_m
.zap_avl
;
206 ASSERT(zn
->zn_zap
->zap_ismicro
);
207 ASSERT(RW_LOCK_HELD(&zn
->zn_zap
->zap_rwlock
));
209 if (strlen(zn
->zn_name_norm
) >= sizeof (mze_tofind
.mze_phys
.mze_name
))
212 mze_tofind
.mze_hash
= zn
->zn_hash
;
213 mze_tofind
.mze_phys
.mze_cd
= 0;
216 mze
= avl_find(avl
, &mze_tofind
, &idx
);
218 mze
= avl_nearest(avl
, idx
, AVL_AFTER
);
219 for (; mze
&& mze
->mze_hash
== zn
->zn_hash
; mze
= AVL_NEXT(avl
, mze
)) {
220 if (zap_match(zn
, mze
->mze_phys
.mze_name
))
223 if (zn
->zn_matchtype
== MT_BEST
) {
224 zn
->zn_matchtype
= MT_FIRST
;
231 mze_find_unused_cd(zap_t
*zap
, uint64_t hash
)
233 mzap_ent_t mze_tofind
;
236 avl_tree_t
*avl
= &zap
->zap_m
.zap_avl
;
239 ASSERT(zap
->zap_ismicro
);
240 ASSERT(RW_LOCK_HELD(&zap
->zap_rwlock
));
242 mze_tofind
.mze_hash
= hash
;
243 mze_tofind
.mze_phys
.mze_cd
= 0;
246 for (mze
= avl_find(avl
, &mze_tofind
, &idx
);
247 mze
&& mze
->mze_hash
== hash
; mze
= AVL_NEXT(avl
, mze
)) {
248 if (mze
->mze_phys
.mze_cd
!= cd
)
257 mze_remove(zap_t
*zap
, mzap_ent_t
*mze
)
259 ASSERT(zap
->zap_ismicro
);
260 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
262 avl_remove(&zap
->zap_m
.zap_avl
, mze
);
263 kmem_free(mze
, sizeof (mzap_ent_t
));
267 mze_destroy(zap_t
*zap
)
270 void *avlcookie
= NULL
;
272 while (mze
= avl_destroy_nodes(&zap
->zap_m
.zap_avl
, &avlcookie
))
273 kmem_free(mze
, sizeof (mzap_ent_t
));
274 avl_destroy(&zap
->zap_m
.zap_avl
);
278 mzap_open(objset_t
*os
, uint64_t obj
, dmu_buf_t
*db
)
284 ASSERT3U(MZAP_ENT_LEN
, ==, sizeof (mzap_ent_phys_t
));
286 zap
= kmem_zalloc(sizeof (zap_t
), KM_SLEEP
);
287 rw_init(&zap
->zap_rwlock
, 0, 0, 0);
288 rw_enter(&zap
->zap_rwlock
, RW_WRITER
);
289 zap
->zap_objset
= os
;
290 zap
->zap_object
= obj
;
293 if (*(uint64_t *)db
->db_data
!= ZBT_MICRO
) {
294 mutex_init(&zap
->zap_f
.zap_num_entries_mtx
, 0, 0, 0);
295 zap
->zap_f
.zap_block_shift
= highbit(db
->db_size
) - 1;
297 zap
->zap_ismicro
= TRUE
;
301 * Make sure that zap_ismicro is set before we let others see
302 * it, because zap_lockdir() checks zap_ismicro without the lock
305 winner
= dmu_buf_set_user(db
, zap
, &zap
->zap_m
.zap_phys
, zap_evict
);
307 if (winner
!= NULL
) {
308 rw_exit(&zap
->zap_rwlock
);
309 rw_destroy(&zap
->zap_rwlock
);
310 if (!zap
->zap_ismicro
)
311 mutex_destroy(&zap
->zap_f
.zap_num_entries_mtx
);
312 kmem_free(zap
, sizeof (zap_t
));
316 if (zap
->zap_ismicro
) {
317 zap
->zap_salt
= zap
->zap_m
.zap_phys
->mz_salt
;
318 zap
->zap_normflags
= zap
->zap_m
.zap_phys
->mz_normflags
;
319 zap
->zap_m
.zap_num_chunks
= db
->db_size
/ MZAP_ENT_LEN
- 1;
320 avl_create(&zap
->zap_m
.zap_avl
, mze_compare
,
321 sizeof (mzap_ent_t
), offsetof(mzap_ent_t
, mze_node
));
323 for (i
= 0; i
< zap
->zap_m
.zap_num_chunks
; i
++) {
324 mzap_ent_phys_t
*mze
=
325 &zap
->zap_m
.zap_phys
->mz_chunk
[i
];
326 if (mze
->mze_name
[0]) {
329 zap
->zap_m
.zap_num_entries
++;
330 zn
= zap_name_alloc(zap
, mze
->mze_name
,
332 mze_insert(zap
, i
, zn
->zn_hash
, mze
);
337 zap
->zap_salt
= zap
->zap_f
.zap_phys
->zap_salt
;
338 zap
->zap_normflags
= zap
->zap_f
.zap_phys
->zap_normflags
;
340 ASSERT3U(sizeof (struct zap_leaf_header
), ==,
341 2*ZAP_LEAF_CHUNKSIZE
);
344 * The embedded pointer table should not overlap the
347 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap
, 0), >,
348 &zap
->zap_f
.zap_phys
->zap_salt
);
351 * The embedded pointer table should end at the end of
354 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap
,
355 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap
)) -
356 (uintptr_t)zap
->zap_f
.zap_phys
, ==,
357 zap
->zap_dbuf
->db_size
);
359 rw_exit(&zap
->zap_rwlock
);
364 zap_lockdir(objset_t
*os
, uint64_t obj
, dmu_tx_t
*tx
,
365 krw_t lti
, boolean_t fatreader
, boolean_t adding
, zap_t
**zapp
)
374 err
= dmu_buf_hold(os
, obj
, 0, NULL
, &db
);
380 dmu_object_info_t doi
;
381 dmu_object_info_from_db(db
, &doi
);
382 ASSERT(dmu_ot
[doi
.doi_type
].ot_byteswap
== zap_byteswap
);
386 zap
= dmu_buf_get_user(db
);
388 zap
= mzap_open(os
, obj
, db
);
391 * We're checking zap_ismicro without the lock held, in order to
392 * tell what type of lock we want. Once we have some sort of
393 * lock, see if it really is the right type. In practice this
394 * can only be different if it was upgraded from micro to fat,
395 * and micro wanted WRITER but fat only needs READER.
397 lt
= (!zap
->zap_ismicro
&& fatreader
) ? RW_READER
: lti
;
398 rw_enter(&zap
->zap_rwlock
, lt
);
399 if (lt
!= ((!zap
->zap_ismicro
&& fatreader
) ? RW_READER
: lti
)) {
400 /* it was upgraded, now we only need reader */
401 ASSERT(lt
== RW_WRITER
);
403 (!zap
->zap_ismicro
&& fatreader
) ? RW_READER
: lti
);
404 rw_downgrade(&zap
->zap_rwlock
);
408 zap
->zap_objset
= os
;
411 dmu_buf_will_dirty(db
, tx
);
413 ASSERT3P(zap
->zap_dbuf
, ==, db
);
415 ASSERT(!zap
->zap_ismicro
||
416 zap
->zap_m
.zap_num_entries
<= zap
->zap_m
.zap_num_chunks
);
417 if (zap
->zap_ismicro
&& tx
&& adding
&&
418 zap
->zap_m
.zap_num_entries
== zap
->zap_m
.zap_num_chunks
) {
419 uint64_t newsz
= db
->db_size
+ SPA_MINBLOCKSIZE
;
420 if (newsz
> MZAP_MAX_BLKSZ
) {
421 dprintf("upgrading obj %llu: num_entries=%u\n",
422 obj
, zap
->zap_m
.zap_num_entries
);
424 return (mzap_upgrade(zapp
, tx
));
426 err
= dmu_object_set_blocksize(os
, obj
, newsz
, 0, tx
);
427 ASSERT3U(err
, ==, 0);
428 zap
->zap_m
.zap_num_chunks
=
429 db
->db_size
/ MZAP_ENT_LEN
- 1;
437 zap_unlockdir(zap_t
*zap
)
439 rw_exit(&zap
->zap_rwlock
);
440 dmu_buf_rele(zap
->zap_dbuf
, NULL
);
444 mzap_upgrade(zap_t
**zapp
, dmu_tx_t
*tx
)
447 int i
, sz
, nchunks
, err
;
450 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
452 sz
= zap
->zap_dbuf
->db_size
;
453 mzp
= kmem_alloc(sz
, KM_SLEEP
);
454 bcopy(zap
->zap_dbuf
->db_data
, mzp
, sz
);
455 nchunks
= zap
->zap_m
.zap_num_chunks
;
457 err
= dmu_object_set_blocksize(zap
->zap_objset
, zap
->zap_object
,
458 1ULL << fzap_default_block_shift
, 0, tx
);
464 dprintf("upgrading obj=%llu with %u chunks\n",
465 zap
->zap_object
, nchunks
);
466 /* XXX destroy the avl later, so we can use the stored hash value */
469 fzap_upgrade(zap
, tx
);
471 for (i
= 0; i
< nchunks
; i
++) {
473 mzap_ent_phys_t
*mze
= &mzp
->mz_chunk
[i
];
475 if (mze
->mze_name
[0] == 0)
477 dprintf("adding %s=%llu\n",
478 mze
->mze_name
, mze
->mze_value
);
479 zn
= zap_name_alloc(zap
, mze
->mze_name
, MT_EXACT
);
480 err
= fzap_add_cd(zn
, 8, 1, &mze
->mze_value
, mze
->mze_cd
, tx
);
481 zap
= zn
->zn_zap
; /* fzap_add_cd() may change zap */
492 mzap_create_impl(objset_t
*os
, uint64_t obj
, int normflags
, dmu_tx_t
*tx
)
497 VERIFY(0 == dmu_buf_hold(os
, obj
, 0, FTAG
, &db
));
501 dmu_object_info_t doi
;
502 dmu_object_info_from_db(db
, &doi
);
503 ASSERT(dmu_ot
[doi
.doi_type
].ot_byteswap
== zap_byteswap
);
507 dmu_buf_will_dirty(db
, tx
);
509 zp
->mz_block_type
= ZBT_MICRO
;
510 zp
->mz_salt
= ((uintptr_t)db
^ (uintptr_t)tx
^ (obj
<< 1)) | 1ULL;
511 zp
->mz_normflags
= normflags
;
512 dmu_buf_rele(db
, FTAG
);
516 zap_create_claim(objset_t
*os
, uint64_t obj
, dmu_object_type_t ot
,
517 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
519 return (zap_create_claim_norm(os
, obj
,
520 0, ot
, bonustype
, bonuslen
, tx
));
524 zap_create_claim_norm(objset_t
*os
, uint64_t obj
, int normflags
,
525 dmu_object_type_t ot
,
526 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
530 err
= dmu_object_claim(os
, obj
, ot
, 0, bonustype
, bonuslen
, tx
);
533 mzap_create_impl(os
, obj
, normflags
, tx
);
538 zap_create(objset_t
*os
, dmu_object_type_t ot
,
539 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
541 return (zap_create_norm(os
, 0, ot
, bonustype
, bonuslen
, tx
));
545 zap_create_norm(objset_t
*os
, int normflags
, dmu_object_type_t ot
,
546 dmu_object_type_t bonustype
, int bonuslen
, dmu_tx_t
*tx
)
548 uint64_t obj
= dmu_object_alloc(os
, ot
, 0, bonustype
, bonuslen
, tx
);
550 mzap_create_impl(os
, obj
, normflags
, tx
);
555 zap_destroy(objset_t
*os
, uint64_t zapobj
, dmu_tx_t
*tx
)
558 * dmu_object_free will free the object number and free the
559 * data. Freeing the data will cause our pageout function to be
560 * called, which will destroy our data (zap_leaf_t's and zap_t).
563 return (dmu_object_free(os
, zapobj
, tx
));
568 zap_evict(dmu_buf_t
*db
, void *vzap
)
572 rw_destroy(&zap
->zap_rwlock
);
574 if (zap
->zap_ismicro
)
577 mutex_destroy(&zap
->zap_f
.zap_num_entries_mtx
);
579 kmem_free(zap
, sizeof (zap_t
));
583 zap_count(objset_t
*os
, uint64_t zapobj
, uint64_t *count
)
588 err
= zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, &zap
);
591 if (!zap
->zap_ismicro
) {
592 err
= fzap_count(zap
, count
);
594 *count
= zap
->zap_m
.zap_num_entries
;
601 * zn may be NULL; if not specified, it will be computed if needed.
602 * See also the comment above zap_entry_normalization_conflict().
605 mzap_normalization_conflict(zap_t
*zap
, zap_name_t
*zn
, mzap_ent_t
*mze
)
608 int direction
= AVL_BEFORE
;
609 boolean_t allocdzn
= B_FALSE
;
611 if (zap
->zap_normflags
== 0)
615 for (other
= avl_walk(&zap
->zap_m
.zap_avl
, mze
, direction
);
616 other
&& other
->mze_hash
== mze
->mze_hash
;
617 other
= avl_walk(&zap
->zap_m
.zap_avl
, other
, direction
)) {
620 zn
= zap_name_alloc(zap
, mze
->mze_phys
.mze_name
,
624 if (zap_match(zn
, other
->mze_phys
.mze_name
)) {
631 if (direction
== AVL_BEFORE
) {
632 direction
= AVL_AFTER
;
642 * Routines for manipulating attributes.
646 zap_lookup(objset_t
*os
, uint64_t zapobj
, const char *name
,
647 uint64_t integer_size
, uint64_t num_integers
, void *buf
)
649 return (zap_lookup_norm(os
, zapobj
, name
, integer_size
,
650 num_integers
, buf
, MT_EXACT
, NULL
, 0, NULL
));
654 zap_lookup_norm(objset_t
*os
, uint64_t zapobj
, const char *name
,
655 uint64_t integer_size
, uint64_t num_integers
, void *buf
,
656 matchtype_t mt
, char *realname
, int rn_len
,
664 err
= zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, &zap
);
667 zn
= zap_name_alloc(zap
, name
, mt
);
673 if (!zap
->zap_ismicro
) {
674 err
= fzap_lookup(zn
, integer_size
, num_integers
, buf
,
675 realname
, rn_len
, ncp
);
681 if (num_integers
< 1) {
683 } else if (integer_size
!= 8) {
686 *(uint64_t *)buf
= mze
->mze_phys
.mze_value
;
687 (void) strlcpy(realname
,
688 mze
->mze_phys
.mze_name
, rn_len
);
690 *ncp
= mzap_normalization_conflict(zap
,
702 zap_length(objset_t
*os
, uint64_t zapobj
, const char *name
,
703 uint64_t *integer_size
, uint64_t *num_integers
)
710 err
= zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, &zap
);
713 zn
= zap_name_alloc(zap
, name
, MT_EXACT
);
718 if (!zap
->zap_ismicro
) {
719 err
= fzap_length(zn
, integer_size
, num_integers
);
737 mzap_addent(zap_name_t
*zn
, uint64_t value
)
740 zap_t
*zap
= zn
->zn_zap
;
741 int start
= zap
->zap_m
.zap_alloc_next
;
744 dprintf("obj=%llu %s=%llu\n", zap
->zap_object
,
745 zn
->zn_name_orij
, value
);
746 ASSERT(RW_WRITE_HELD(&zap
->zap_rwlock
));
749 for (i
= 0; i
< zap
->zap_m
.zap_num_chunks
; i
++) {
750 mzap_ent_phys_t
*mze
= &zap
->zap_m
.zap_phys
->mz_chunk
[i
];
751 ASSERT(strcmp(zn
->zn_name_orij
, mze
->mze_name
) != 0);
755 cd
= mze_find_unused_cd(zap
, zn
->zn_hash
);
756 /* given the limited size of the microzap, this can't happen */
757 ASSERT(cd
!= ZAP_MAXCD
);
760 for (i
= start
; i
< zap
->zap_m
.zap_num_chunks
; i
++) {
761 mzap_ent_phys_t
*mze
= &zap
->zap_m
.zap_phys
->mz_chunk
[i
];
762 if (mze
->mze_name
[0] == 0) {
763 mze
->mze_value
= value
;
765 (void) strcpy(mze
->mze_name
, zn
->zn_name_orij
);
766 zap
->zap_m
.zap_num_entries
++;
767 zap
->zap_m
.zap_alloc_next
= i
+1;
768 if (zap
->zap_m
.zap_alloc_next
==
769 zap
->zap_m
.zap_num_chunks
)
770 zap
->zap_m
.zap_alloc_next
= 0;
771 mze_insert(zap
, i
, zn
->zn_hash
, mze
);
779 ASSERT(!"out of entries!");
783 zap_add(objset_t
*os
, uint64_t zapobj
, const char *name
,
784 int integer_size
, uint64_t num_integers
,
785 const void *val
, dmu_tx_t
*tx
)
790 const uint64_t *intval
= val
;
793 err
= zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, &zap
);
796 zn
= zap_name_alloc(zap
, name
, MT_EXACT
);
801 if (!zap
->zap_ismicro
) {
802 err
= fzap_add(zn
, integer_size
, num_integers
, val
, tx
);
803 zap
= zn
->zn_zap
; /* fzap_add() may change zap */
804 } else if (integer_size
!= 8 || num_integers
!= 1 ||
805 strlen(name
) >= MZAP_NAME_LEN
) {
806 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
807 zapobj
, integer_size
, num_integers
, name
);
808 err
= mzap_upgrade(&zn
->zn_zap
, tx
);
810 err
= fzap_add(zn
, integer_size
, num_integers
, val
, tx
);
811 zap
= zn
->zn_zap
; /* fzap_add() may change zap */
817 mzap_addent(zn
, *intval
);
820 ASSERT(zap
== zn
->zn_zap
);
822 if (zap
!= NULL
) /* may be NULL if fzap_add() failed */
828 zap_update(objset_t
*os
, uint64_t zapobj
, const char *name
,
829 int integer_size
, uint64_t num_integers
, const void *val
, dmu_tx_t
*tx
)
833 const uint64_t *intval
= val
;
837 err
= zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, TRUE
, &zap
);
840 zn
= zap_name_alloc(zap
, name
, MT_EXACT
);
845 if (!zap
->zap_ismicro
) {
846 err
= fzap_update(zn
, integer_size
, num_integers
, val
, tx
);
847 zap
= zn
->zn_zap
; /* fzap_update() may change zap */
848 } else if (integer_size
!= 8 || num_integers
!= 1 ||
849 strlen(name
) >= MZAP_NAME_LEN
) {
850 dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
851 zapobj
, integer_size
, num_integers
, name
);
852 err
= mzap_upgrade(&zn
->zn_zap
, tx
);
854 err
= fzap_update(zn
, integer_size
, num_integers
,
856 zap
= zn
->zn_zap
; /* fzap_update() may change zap */
860 mze
->mze_phys
.mze_value
= *intval
;
861 zap
->zap_m
.zap_phys
->mz_chunk
862 [mze
->mze_chunkid
].mze_value
= *intval
;
864 mzap_addent(zn
, *intval
);
867 ASSERT(zap
== zn
->zn_zap
);
869 if (zap
!= NULL
) /* may be NULL if fzap_upgrade() failed */
875 zap_remove(objset_t
*os
, uint64_t zapobj
, const char *name
, dmu_tx_t
*tx
)
877 return (zap_remove_norm(os
, zapobj
, name
, MT_EXACT
, tx
));
881 zap_remove_norm(objset_t
*os
, uint64_t zapobj
, const char *name
,
882 matchtype_t mt
, dmu_tx_t
*tx
)
889 err
= zap_lockdir(os
, zapobj
, tx
, RW_WRITER
, TRUE
, FALSE
, &zap
);
892 zn
= zap_name_alloc(zap
, name
, mt
);
897 if (!zap
->zap_ismicro
) {
898 err
= fzap_remove(zn
, tx
);
904 zap
->zap_m
.zap_num_entries
--;
905 bzero(&zap
->zap_m
.zap_phys
->mz_chunk
[mze
->mze_chunkid
],
906 sizeof (mzap_ent_phys_t
));
907 mze_remove(zap
, mze
);
916 * Routines for iterating over the attributes.
920 * We want to keep the high 32 bits of the cursor zero if we can, so
921 * that 32-bit programs can access this. So use a small hash value so
922 * we can fit 4 bits of cd into the 32-bit cursor.
924 * [ 4 zero bits | 32-bit collision differentiator | 28-bit hash value ]
927 zap_cursor_init_serialized(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
,
933 zc
->zc_zapobj
= zapobj
;
934 if (serialized
== -1ULL) {
938 zc
->zc_hash
= serialized
<< (64-ZAP_HASHBITS
);
939 zc
->zc_cd
= serialized
>> ZAP_HASHBITS
;
940 if (zc
->zc_cd
>= ZAP_MAXCD
) /* corrupt serialized */
946 zap_cursor_init(zap_cursor_t
*zc
, objset_t
*os
, uint64_t zapobj
)
948 zap_cursor_init_serialized(zc
, os
, zapobj
, 0);
952 zap_cursor_fini(zap_cursor_t
*zc
)
955 rw_enter(&zc
->zc_zap
->zap_rwlock
, RW_READER
);
956 zap_unlockdir(zc
->zc_zap
);
960 rw_enter(&zc
->zc_leaf
->l_rwlock
, RW_READER
);
961 zap_put_leaf(zc
->zc_leaf
);
964 zc
->zc_objset
= NULL
;
968 zap_cursor_serialize(zap_cursor_t
*zc
)
970 if (zc
->zc_hash
== -1ULL)
972 ASSERT((zc
->zc_hash
& (ZAP_MAXCD
-1)) == 0);
973 ASSERT(zc
->zc_cd
< ZAP_MAXCD
);
974 return ((zc
->zc_hash
>> (64-ZAP_HASHBITS
)) |
975 ((uint64_t)zc
->zc_cd
<< ZAP_HASHBITS
));
979 zap_cursor_retrieve(zap_cursor_t
*zc
, zap_attribute_t
*za
)
983 mzap_ent_t mze_tofind
;
986 if (zc
->zc_hash
== -1ULL)
989 if (zc
->zc_zap
== NULL
) {
990 err
= zap_lockdir(zc
->zc_objset
, zc
->zc_zapobj
, NULL
,
991 RW_READER
, TRUE
, FALSE
, &zc
->zc_zap
);
995 rw_enter(&zc
->zc_zap
->zap_rwlock
, RW_READER
);
997 if (!zc
->zc_zap
->zap_ismicro
) {
998 err
= fzap_cursor_retrieve(zc
->zc_zap
, zc
, za
);
1002 mze_tofind
.mze_hash
= zc
->zc_hash
;
1003 mze_tofind
.mze_phys
.mze_cd
= zc
->zc_cd
;
1005 mze
= avl_find(&zc
->zc_zap
->zap_m
.zap_avl
, &mze_tofind
, &idx
);
1007 mze
= avl_nearest(&zc
->zc_zap
->zap_m
.zap_avl
,
1011 ASSERT(0 == bcmp(&mze
->mze_phys
,
1012 &zc
->zc_zap
->zap_m
.zap_phys
->mz_chunk
1013 [mze
->mze_chunkid
], sizeof (mze
->mze_phys
)));
1015 za
->za_normalization_conflict
=
1016 mzap_normalization_conflict(zc
->zc_zap
, NULL
, mze
);
1017 za
->za_integer_length
= 8;
1018 za
->za_num_integers
= 1;
1019 za
->za_first_integer
= mze
->mze_phys
.mze_value
;
1020 (void) strcpy(za
->za_name
, mze
->mze_phys
.mze_name
);
1021 zc
->zc_hash
= mze
->mze_hash
;
1022 zc
->zc_cd
= mze
->mze_phys
.mze_cd
;
1025 zc
->zc_hash
= -1ULL;
1028 rw_exit(&zc
->zc_zap
->zap_rwlock
);
1033 zap_cursor_advance(zap_cursor_t
*zc
)
1035 if (zc
->zc_hash
== -1ULL)
1038 if (zc
->zc_cd
>= ZAP_MAXCD
) {
1040 zc
->zc_hash
+= 1ULL<<(64-ZAP_HASHBITS
);
1041 if (zc
->zc_hash
== 0) /* EOF */
1042 zc
->zc_hash
= -1ULL;
1047 zap_get_stats(objset_t
*os
, uint64_t zapobj
, zap_stats_t
*zs
)
1052 err
= zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, &zap
);
1056 bzero(zs
, sizeof (zap_stats_t
));
1058 if (zap
->zap_ismicro
) {
1059 zs
->zs_blocksize
= zap
->zap_dbuf
->db_size
;
1060 zs
->zs_num_entries
= zap
->zap_m
.zap_num_entries
;
1061 zs
->zs_num_blocks
= 1;
1063 fzap_get_stats(zap
, zs
);
1070 zap_count_write(objset_t
*os
, uint64_t zapobj
, const char *name
, int add
,
1071 uint64_t *towrite
, uint64_t *tooverwrite
)
1078 * Since, we don't have a name, we cannot figure out which blocks will
1079 * be affected in this operation. So, account for the worst case :
1080 * - 3 blocks overwritten: target leaf, ptrtbl block, header block
1081 * - 4 new blocks written if adding:
1082 * - 2 blocks for possibly split leaves,
1083 * - 2 grown ptrtbl blocks
1085 * This also accomodates the case where an add operation to a fairly
1086 * large microzap results in a promotion to fatzap.
1089 *towrite
+= (3 + (add
? 4 : 0)) * SPA_MAXBLOCKSIZE
;
1094 * We lock the zap with adding == FALSE. Because, if we pass
1095 * the actual value of add, it could trigger a mzap_upgrade().
1096 * At present we are just evaluating the possibility of this operation
1097 * and hence we donot want to trigger an upgrade.
1099 err
= zap_lockdir(os
, zapobj
, NULL
, RW_READER
, TRUE
, FALSE
, &zap
);
1103 if (!zap
->zap_ismicro
) {
1104 zap_name_t
*zn
= zap_name_alloc(zap
, name
, MT_EXACT
);
1106 err
= fzap_count_write(zn
, add
, towrite
,
1111 * We treat this case as similar to (name == NULL)
1113 *towrite
+= (3 + (add
? 4 : 0)) * SPA_MAXBLOCKSIZE
;
1117 * We are here if (name != NULL) and this is a micro-zap.
1118 * We account for the header block depending on whether it
1121 * Incase of an add-operation it is hard to find out
1122 * if this add will promote this microzap to fatzap.
1123 * Hence, we consider the worst case and account for the
1124 * blocks assuming this microzap would be promoted to a
1127 * 1 block overwritten : header block
1128 * 4 new blocks written : 2 new split leaf, 2 grown
1131 if (dmu_buf_freeable(zap
->zap_dbuf
))
1132 *tooverwrite
+= SPA_MAXBLOCKSIZE
;
1134 *towrite
+= SPA_MAXBLOCKSIZE
;
1137 *towrite
+= 4 * SPA_MAXBLOCKSIZE
;