]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
9ae529ec | 23 | * Copyright (c) 2012 by Delphix. All rights reserved. |
34dc7c2f BB |
24 | */ |
25 | ||
428870ff | 26 | #include <sys/zio.h> |
34dc7c2f BB |
27 | #include <sys/spa.h> |
28 | #include <sys/dmu.h> | |
29 | #include <sys/zfs_context.h> | |
30 | #include <sys/zap.h> | |
31 | #include <sys/refcount.h> | |
32 | #include <sys/zap_impl.h> | |
33 | #include <sys/zap_leaf.h> | |
34 | #include <sys/avl.h> | |
428870ff | 35 | #include <sys/arc.h> |
34dc7c2f BB |
36 | |
37 | #ifdef _KERNEL | |
38 | #include <sys/sunddi.h> | |
39 | #endif | |
40 | ||
428870ff | 41 | static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); |
34dc7c2f | 42 | |
428870ff BB |
43 | uint64_t |
44 | zap_getflags(zap_t *zap) | |
45 | { | |
46 | if (zap->zap_ismicro) | |
47 | return (0); | |
48 | return (zap->zap_u.zap_fat.zap_phys->zap_flags); | |
49 | } | |
34dc7c2f | 50 | |
428870ff BB |
51 | int |
52 | zap_hashbits(zap_t *zap) | |
34dc7c2f | 53 | { |
428870ff BB |
54 | if (zap_getflags(zap) & ZAP_FLAG_HASH64) |
55 | return (48); | |
56 | else | |
57 | return (28); | |
58 | } | |
34dc7c2f | 59 | |
428870ff BB |
60 | uint32_t |
61 | zap_maxcd(zap_t *zap) | |
62 | { | |
63 | if (zap_getflags(zap) & ZAP_FLAG_HASH64) | |
64 | return ((1<<16)-1); | |
65 | else | |
66 | return (-1U); | |
67 | } | |
34dc7c2f | 68 | |
428870ff BB |
69 | static uint64_t |
70 | zap_hash(zap_name_t *zn) | |
71 | { | |
72 | zap_t *zap = zn->zn_zap; | |
73 | uint64_t h = 0; | |
34dc7c2f | 74 | |
428870ff BB |
75 | if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { |
76 | ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); | |
77 | h = *(uint64_t *)zn->zn_key_orig; | |
78 | } else { | |
79 | h = zap->zap_salt; | |
80 | ASSERT(h != 0); | |
81 | ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); | |
82 | ||
83 | if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { | |
84 | int i; | |
85 | const uint64_t *wp = zn->zn_key_norm; | |
86 | ||
87 | ASSERT(zn->zn_key_intlen == 8); | |
88 | for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { | |
89 | int j; | |
90 | uint64_t word = *wp; | |
91 | ||
92 | for (j = 0; j < zn->zn_key_intlen; j++) { | |
93 | h = (h >> 8) ^ | |
94 | zfs_crc64_table[(h ^ word) & 0xFF]; | |
95 | word >>= NBBY; | |
96 | } | |
97 | } | |
98 | } else { | |
99 | int i, len; | |
100 | const uint8_t *cp = zn->zn_key_norm; | |
101 | ||
102 | /* | |
103 | * We previously stored the terminating null on | |
104 | * disk, but didn't hash it, so we need to | |
105 | * continue to not hash it. (The | |
106 | * zn_key_*_numints includes the terminating | |
107 | * null for non-binary keys.) | |
108 | */ | |
109 | len = zn->zn_key_norm_numints - 1; | |
110 | ||
111 | ASSERT(zn->zn_key_intlen == 1); | |
112 | for (i = 0; i < len; cp++, i++) { | |
113 | h = (h >> 8) ^ | |
114 | zfs_crc64_table[(h ^ *cp) & 0xFF]; | |
115 | } | |
116 | } | |
117 | } | |
34dc7c2f | 118 | /* |
428870ff BB |
119 | * Don't use all 64 bits, since we need some in the cookie for |
120 | * the collision differentiator. We MUST use the high bits, | |
121 | * since those are the ones that we first pay attention to when | |
34dc7c2f BB |
122 | * chosing the bucket. |
123 | */ | |
428870ff | 124 | h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); |
34dc7c2f | 125 | |
428870ff | 126 | return (h); |
34dc7c2f BB |
127 | } |
128 | ||
129 | static int | |
130 | zap_normalize(zap_t *zap, const char *name, char *namenorm) | |
131 | { | |
132 | size_t inlen, outlen; | |
133 | int err; | |
134 | ||
428870ff BB |
135 | ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); |
136 | ||
34dc7c2f BB |
137 | inlen = strlen(name) + 1; |
138 | outlen = ZAP_MAXNAMELEN; | |
139 | ||
140 | err = 0; | |
141 | (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, | |
9babb374 BB |
142 | zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | |
143 | U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); | |
34dc7c2f BB |
144 | |
145 | return (err); | |
146 | } | |
147 | ||
148 | boolean_t | |
149 | zap_match(zap_name_t *zn, const char *matchname) | |
150 | { | |
428870ff BB |
151 | ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); |
152 | ||
34dc7c2f BB |
153 | if (zn->zn_matchtype == MT_FIRST) { |
154 | char norm[ZAP_MAXNAMELEN]; | |
155 | ||
156 | if (zap_normalize(zn->zn_zap, matchname, norm) != 0) | |
157 | return (B_FALSE); | |
158 | ||
428870ff | 159 | return (strcmp(zn->zn_key_norm, norm) == 0); |
34dc7c2f BB |
160 | } else { |
161 | /* MT_BEST or MT_EXACT */ | |
428870ff | 162 | return (strcmp(zn->zn_key_orig, matchname) == 0); |
34dc7c2f BB |
163 | } |
164 | } | |
165 | ||
166 | void | |
167 | zap_name_free(zap_name_t *zn) | |
168 | { | |
169 | kmem_free(zn, sizeof (zap_name_t)); | |
170 | } | |
171 | ||
34dc7c2f | 172 | zap_name_t * |
428870ff | 173 | zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) |
34dc7c2f | 174 | { |
b8d06fca | 175 | zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_PUSHPAGE); |
34dc7c2f BB |
176 | |
177 | zn->zn_zap = zap; | |
428870ff BB |
178 | zn->zn_key_intlen = sizeof (*key); |
179 | zn->zn_key_orig = key; | |
180 | zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; | |
34dc7c2f BB |
181 | zn->zn_matchtype = mt; |
182 | if (zap->zap_normflags) { | |
428870ff | 183 | if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { |
34dc7c2f BB |
184 | zap_name_free(zn); |
185 | return (NULL); | |
186 | } | |
428870ff BB |
187 | zn->zn_key_norm = zn->zn_normbuf; |
188 | zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; | |
34dc7c2f BB |
189 | } else { |
190 | if (mt != MT_EXACT) { | |
191 | zap_name_free(zn); | |
192 | return (NULL); | |
193 | } | |
428870ff BB |
194 | zn->zn_key_norm = zn->zn_key_orig; |
195 | zn->zn_key_norm_numints = zn->zn_key_orig_numints; | |
34dc7c2f BB |
196 | } |
197 | ||
428870ff BB |
198 | zn->zn_hash = zap_hash(zn); |
199 | return (zn); | |
200 | } | |
201 | ||
202 | zap_name_t * | |
203 | zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) | |
204 | { | |
cafa9709 | 205 | zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_PUSHPAGE); |
428870ff BB |
206 | |
207 | ASSERT(zap->zap_normflags == 0); | |
208 | zn->zn_zap = zap; | |
209 | zn->zn_key_intlen = sizeof (*key); | |
210 | zn->zn_key_orig = zn->zn_key_norm = key; | |
211 | zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; | |
212 | zn->zn_matchtype = MT_EXACT; | |
213 | ||
214 | zn->zn_hash = zap_hash(zn); | |
34dc7c2f BB |
215 | return (zn); |
216 | } | |
217 | ||
218 | static void | |
219 | mzap_byteswap(mzap_phys_t *buf, size_t size) | |
220 | { | |
221 | int i, max; | |
222 | buf->mz_block_type = BSWAP_64(buf->mz_block_type); | |
223 | buf->mz_salt = BSWAP_64(buf->mz_salt); | |
224 | buf->mz_normflags = BSWAP_64(buf->mz_normflags); | |
225 | max = (size / MZAP_ENT_LEN) - 1; | |
226 | for (i = 0; i < max; i++) { | |
227 | buf->mz_chunk[i].mze_value = | |
228 | BSWAP_64(buf->mz_chunk[i].mze_value); | |
229 | buf->mz_chunk[i].mze_cd = | |
230 | BSWAP_32(buf->mz_chunk[i].mze_cd); | |
231 | } | |
232 | } | |
233 | ||
234 | void | |
235 | zap_byteswap(void *buf, size_t size) | |
236 | { | |
237 | uint64_t block_type; | |
238 | ||
239 | block_type = *(uint64_t *)buf; | |
240 | ||
241 | if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { | |
242 | /* ASSERT(magic == ZAP_LEAF_MAGIC); */ | |
243 | mzap_byteswap(buf, size); | |
244 | } else { | |
245 | fzap_byteswap(buf, size); | |
246 | } | |
247 | } | |
248 | ||
249 | static int | |
250 | mze_compare(const void *arg1, const void *arg2) | |
251 | { | |
252 | const mzap_ent_t *mze1 = arg1; | |
253 | const mzap_ent_t *mze2 = arg2; | |
254 | ||
255 | if (mze1->mze_hash > mze2->mze_hash) | |
256 | return (+1); | |
257 | if (mze1->mze_hash < mze2->mze_hash) | |
258 | return (-1); | |
428870ff | 259 | if (mze1->mze_cd > mze2->mze_cd) |
34dc7c2f | 260 | return (+1); |
428870ff | 261 | if (mze1->mze_cd < mze2->mze_cd) |
34dc7c2f BB |
262 | return (-1); |
263 | return (0); | |
264 | } | |
265 | ||
266 | static void | |
428870ff | 267 | mze_insert(zap_t *zap, int chunkid, uint64_t hash) |
34dc7c2f BB |
268 | { |
269 | mzap_ent_t *mze; | |
270 | ||
271 | ASSERT(zap->zap_ismicro); | |
272 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); | |
34dc7c2f | 273 | |
b8d06fca | 274 | mze = kmem_alloc(sizeof (mzap_ent_t), KM_PUSHPAGE); |
34dc7c2f BB |
275 | mze->mze_chunkid = chunkid; |
276 | mze->mze_hash = hash; | |
428870ff BB |
277 | mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; |
278 | ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); | |
34dc7c2f BB |
279 | avl_add(&zap->zap_m.zap_avl, mze); |
280 | } | |
281 | ||
282 | static mzap_ent_t * | |
283 | mze_find(zap_name_t *zn) | |
284 | { | |
285 | mzap_ent_t mze_tofind; | |
286 | mzap_ent_t *mze; | |
287 | avl_index_t idx; | |
288 | avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; | |
289 | ||
290 | ASSERT(zn->zn_zap->zap_ismicro); | |
291 | ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); | |
292 | ||
34dc7c2f | 293 | mze_tofind.mze_hash = zn->zn_hash; |
428870ff | 294 | mze_tofind.mze_cd = 0; |
34dc7c2f BB |
295 | |
296 | again: | |
297 | mze = avl_find(avl, &mze_tofind, &idx); | |
298 | if (mze == NULL) | |
299 | mze = avl_nearest(avl, idx, AVL_AFTER); | |
300 | for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { | |
428870ff BB |
301 | ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); |
302 | if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) | |
34dc7c2f BB |
303 | return (mze); |
304 | } | |
305 | if (zn->zn_matchtype == MT_BEST) { | |
306 | zn->zn_matchtype = MT_FIRST; | |
307 | goto again; | |
308 | } | |
309 | return (NULL); | |
310 | } | |
311 | ||
312 | static uint32_t | |
313 | mze_find_unused_cd(zap_t *zap, uint64_t hash) | |
314 | { | |
315 | mzap_ent_t mze_tofind; | |
316 | mzap_ent_t *mze; | |
317 | avl_index_t idx; | |
318 | avl_tree_t *avl = &zap->zap_m.zap_avl; | |
319 | uint32_t cd; | |
320 | ||
321 | ASSERT(zap->zap_ismicro); | |
322 | ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); | |
323 | ||
324 | mze_tofind.mze_hash = hash; | |
428870ff | 325 | mze_tofind.mze_cd = 0; |
34dc7c2f BB |
326 | |
327 | cd = 0; | |
328 | for (mze = avl_find(avl, &mze_tofind, &idx); | |
329 | mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { | |
428870ff | 330 | if (mze->mze_cd != cd) |
34dc7c2f BB |
331 | break; |
332 | cd++; | |
333 | } | |
334 | ||
335 | return (cd); | |
336 | } | |
337 | ||
338 | static void | |
339 | mze_remove(zap_t *zap, mzap_ent_t *mze) | |
340 | { | |
341 | ASSERT(zap->zap_ismicro); | |
342 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); | |
343 | ||
344 | avl_remove(&zap->zap_m.zap_avl, mze); | |
345 | kmem_free(mze, sizeof (mzap_ent_t)); | |
346 | } | |
347 | ||
348 | static void | |
349 | mze_destroy(zap_t *zap) | |
350 | { | |
351 | mzap_ent_t *mze; | |
352 | void *avlcookie = NULL; | |
353 | ||
c65aa5b2 | 354 | while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))) |
34dc7c2f BB |
355 | kmem_free(mze, sizeof (mzap_ent_t)); |
356 | avl_destroy(&zap->zap_m.zap_avl); | |
357 | } | |
358 | ||
359 | static zap_t * | |
360 | mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) | |
361 | { | |
362 | zap_t *winner; | |
363 | zap_t *zap; | |
364 | int i; | |
365 | ||
366 | ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); | |
367 | ||
b8d06fca | 368 | zap = kmem_zalloc(sizeof (zap_t), KM_PUSHPAGE); |
ef5319df | 369 | rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL); |
34dc7c2f BB |
370 | rw_enter(&zap->zap_rwlock, RW_WRITER); |
371 | zap->zap_objset = os; | |
372 | zap->zap_object = obj; | |
373 | zap->zap_dbuf = db; | |
374 | ||
375 | if (*(uint64_t *)db->db_data != ZBT_MICRO) { | |
376 | mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); | |
377 | zap->zap_f.zap_block_shift = highbit(db->db_size) - 1; | |
378 | } else { | |
379 | zap->zap_ismicro = TRUE; | |
380 | } | |
381 | ||
382 | /* | |
383 | * Make sure that zap_ismicro is set before we let others see | |
384 | * it, because zap_lockdir() checks zap_ismicro without the lock | |
385 | * held. | |
386 | */ | |
387 | winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict); | |
388 | ||
389 | if (winner != NULL) { | |
390 | rw_exit(&zap->zap_rwlock); | |
391 | rw_destroy(&zap->zap_rwlock); | |
392 | if (!zap->zap_ismicro) | |
393 | mutex_destroy(&zap->zap_f.zap_num_entries_mtx); | |
394 | kmem_free(zap, sizeof (zap_t)); | |
395 | return (winner); | |
396 | } | |
397 | ||
398 | if (zap->zap_ismicro) { | |
399 | zap->zap_salt = zap->zap_m.zap_phys->mz_salt; | |
400 | zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags; | |
401 | zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; | |
402 | avl_create(&zap->zap_m.zap_avl, mze_compare, | |
403 | sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); | |
404 | ||
405 | for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { | |
406 | mzap_ent_phys_t *mze = | |
407 | &zap->zap_m.zap_phys->mz_chunk[i]; | |
408 | if (mze->mze_name[0]) { | |
409 | zap_name_t *zn; | |
410 | ||
411 | zap->zap_m.zap_num_entries++; | |
412 | zn = zap_name_alloc(zap, mze->mze_name, | |
413 | MT_EXACT); | |
428870ff | 414 | mze_insert(zap, i, zn->zn_hash); |
34dc7c2f BB |
415 | zap_name_free(zn); |
416 | } | |
417 | } | |
418 | } else { | |
419 | zap->zap_salt = zap->zap_f.zap_phys->zap_salt; | |
420 | zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags; | |
421 | ||
422 | ASSERT3U(sizeof (struct zap_leaf_header), ==, | |
423 | 2*ZAP_LEAF_CHUNKSIZE); | |
424 | ||
425 | /* | |
426 | * The embedded pointer table should not overlap the | |
427 | * other members. | |
428 | */ | |
429 | ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, | |
430 | &zap->zap_f.zap_phys->zap_salt); | |
431 | ||
432 | /* | |
433 | * The embedded pointer table should end at the end of | |
434 | * the block | |
435 | */ | |
436 | ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, | |
437 | 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - | |
438 | (uintptr_t)zap->zap_f.zap_phys, ==, | |
439 | zap->zap_dbuf->db_size); | |
440 | } | |
441 | rw_exit(&zap->zap_rwlock); | |
442 | return (zap); | |
443 | } | |
444 | ||
445 | int | |
446 | zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, | |
447 | krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) | |
448 | { | |
449 | zap_t *zap; | |
450 | dmu_buf_t *db; | |
451 | krw_t lt; | |
452 | int err; | |
453 | ||
454 | *zapp = NULL; | |
455 | ||
428870ff | 456 | err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); |
34dc7c2f BB |
457 | if (err) |
458 | return (err); | |
459 | ||
460 | #ifdef ZFS_DEBUG | |
461 | { | |
462 | dmu_object_info_t doi; | |
463 | dmu_object_info_from_db(db, &doi); | |
9ae529ec | 464 | ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); |
34dc7c2f BB |
465 | } |
466 | #endif | |
467 | ||
468 | zap = dmu_buf_get_user(db); | |
469 | if (zap == NULL) | |
470 | zap = mzap_open(os, obj, db); | |
471 | ||
472 | /* | |
473 | * We're checking zap_ismicro without the lock held, in order to | |
474 | * tell what type of lock we want. Once we have some sort of | |
475 | * lock, see if it really is the right type. In practice this | |
476 | * can only be different if it was upgraded from micro to fat, | |
477 | * and micro wanted WRITER but fat only needs READER. | |
478 | */ | |
479 | lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; | |
480 | rw_enter(&zap->zap_rwlock, lt); | |
481 | if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { | |
482 | /* it was upgraded, now we only need reader */ | |
483 | ASSERT(lt == RW_WRITER); | |
484 | ASSERT(RW_READER == | |
485 | (!zap->zap_ismicro && fatreader) ? RW_READER : lti); | |
486 | rw_downgrade(&zap->zap_rwlock); | |
487 | lt = RW_READER; | |
488 | } | |
489 | ||
490 | zap->zap_objset = os; | |
491 | ||
492 | if (lt == RW_WRITER) | |
493 | dmu_buf_will_dirty(db, tx); | |
494 | ||
495 | ASSERT3P(zap->zap_dbuf, ==, db); | |
496 | ||
497 | ASSERT(!zap->zap_ismicro || | |
498 | zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); | |
499 | if (zap->zap_ismicro && tx && adding && | |
500 | zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { | |
501 | uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; | |
502 | if (newsz > MZAP_MAX_BLKSZ) { | |
503 | dprintf("upgrading obj %llu: num_entries=%u\n", | |
504 | obj, zap->zap_m.zap_num_entries); | |
505 | *zapp = zap; | |
428870ff | 506 | return (mzap_upgrade(zapp, tx, 0)); |
34dc7c2f BB |
507 | } |
508 | err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); | |
c99c9001 | 509 | ASSERT0(err); |
34dc7c2f BB |
510 | zap->zap_m.zap_num_chunks = |
511 | db->db_size / MZAP_ENT_LEN - 1; | |
512 | } | |
513 | ||
514 | *zapp = zap; | |
515 | return (0); | |
516 | } | |
517 | ||
518 | void | |
519 | zap_unlockdir(zap_t *zap) | |
520 | { | |
521 | rw_exit(&zap->zap_rwlock); | |
522 | dmu_buf_rele(zap->zap_dbuf, NULL); | |
523 | } | |
524 | ||
525 | static int | |
428870ff | 526 | mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) |
34dc7c2f BB |
527 | { |
528 | mzap_phys_t *mzp; | |
428870ff BB |
529 | int i, sz, nchunks; |
530 | int err = 0; | |
34dc7c2f BB |
531 | zap_t *zap = *zapp; |
532 | ||
533 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); | |
534 | ||
535 | sz = zap->zap_dbuf->db_size; | |
991fc1d7 | 536 | mzp = kmem_alloc(sz, KM_PUSHPAGE | KM_NODEBUG); |
34dc7c2f BB |
537 | bcopy(zap->zap_dbuf->db_data, mzp, sz); |
538 | nchunks = zap->zap_m.zap_num_chunks; | |
539 | ||
428870ff BB |
540 | if (!flags) { |
541 | err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, | |
542 | 1ULL << fzap_default_block_shift, 0, tx); | |
543 | if (err) { | |
991fc1d7 | 544 | kmem_free(mzp, sz); |
428870ff BB |
545 | return (err); |
546 | } | |
34dc7c2f BB |
547 | } |
548 | ||
549 | dprintf("upgrading obj=%llu with %u chunks\n", | |
550 | zap->zap_object, nchunks); | |
551 | /* XXX destroy the avl later, so we can use the stored hash value */ | |
552 | mze_destroy(zap); | |
553 | ||
428870ff | 554 | fzap_upgrade(zap, tx, flags); |
34dc7c2f BB |
555 | |
556 | for (i = 0; i < nchunks; i++) { | |
34dc7c2f BB |
557 | mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; |
558 | zap_name_t *zn; | |
559 | if (mze->mze_name[0] == 0) | |
560 | continue; | |
561 | dprintf("adding %s=%llu\n", | |
562 | mze->mze_name, mze->mze_value); | |
563 | zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); | |
564 | err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); | |
565 | zap = zn->zn_zap; /* fzap_add_cd() may change zap */ | |
566 | zap_name_free(zn); | |
567 | if (err) | |
568 | break; | |
569 | } | |
991fc1d7 | 570 | kmem_free(mzp, sz); |
34dc7c2f BB |
571 | *zapp = zap; |
572 | return (err); | |
573 | } | |
574 | ||
575 | static void | |
428870ff BB |
576 | mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, |
577 | dmu_tx_t *tx) | |
34dc7c2f BB |
578 | { |
579 | dmu_buf_t *db; | |
580 | mzap_phys_t *zp; | |
581 | ||
428870ff | 582 | VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); |
34dc7c2f BB |
583 | |
584 | #ifdef ZFS_DEBUG | |
585 | { | |
586 | dmu_object_info_t doi; | |
587 | dmu_object_info_from_db(db, &doi); | |
9ae529ec | 588 | ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); |
34dc7c2f BB |
589 | } |
590 | #endif | |
591 | ||
592 | dmu_buf_will_dirty(db, tx); | |
593 | zp = db->db_data; | |
594 | zp->mz_block_type = ZBT_MICRO; | |
595 | zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; | |
596 | zp->mz_normflags = normflags; | |
597 | dmu_buf_rele(db, FTAG); | |
428870ff BB |
598 | |
599 | if (flags != 0) { | |
600 | zap_t *zap; | |
601 | /* Only fat zap supports flags; upgrade immediately. */ | |
602 | VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, | |
603 | B_FALSE, B_FALSE, &zap)); | |
604 | VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); | |
605 | zap_unlockdir(zap); | |
606 | } | |
34dc7c2f BB |
607 | } |
608 | ||
609 | int | |
610 | zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, | |
611 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
612 | { | |
613 | return (zap_create_claim_norm(os, obj, | |
614 | 0, ot, bonustype, bonuslen, tx)); | |
615 | } | |
616 | ||
617 | int | |
618 | zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, | |
619 | dmu_object_type_t ot, | |
620 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
621 | { | |
622 | int err; | |
623 | ||
624 | err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); | |
625 | if (err != 0) | |
626 | return (err); | |
428870ff | 627 | mzap_create_impl(os, obj, normflags, 0, tx); |
34dc7c2f BB |
628 | return (0); |
629 | } | |
630 | ||
631 | uint64_t | |
632 | zap_create(objset_t *os, dmu_object_type_t ot, | |
633 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
634 | { | |
635 | return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); | |
636 | } | |
637 | ||
638 | uint64_t | |
639 | zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, | |
640 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
641 | { | |
642 | uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); | |
643 | ||
428870ff BB |
644 | mzap_create_impl(os, obj, normflags, 0, tx); |
645 | return (obj); | |
646 | } | |
647 | ||
648 | uint64_t | |
649 | zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, | |
650 | dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, | |
651 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
652 | { | |
653 | uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); | |
654 | ||
655 | ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && | |
656 | leaf_blockshift <= SPA_MAXBLOCKSHIFT && | |
657 | indirect_blockshift >= SPA_MINBLOCKSHIFT && | |
658 | indirect_blockshift <= SPA_MAXBLOCKSHIFT); | |
659 | ||
660 | VERIFY(dmu_object_set_blocksize(os, obj, | |
661 | 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); | |
662 | ||
663 | mzap_create_impl(os, obj, normflags, flags, tx); | |
34dc7c2f BB |
664 | return (obj); |
665 | } | |
666 | ||
667 | int | |
668 | zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) | |
669 | { | |
670 | /* | |
671 | * dmu_object_free will free the object number and free the | |
672 | * data. Freeing the data will cause our pageout function to be | |
673 | * called, which will destroy our data (zap_leaf_t's and zap_t). | |
674 | */ | |
675 | ||
676 | return (dmu_object_free(os, zapobj, tx)); | |
677 | } | |
678 | ||
679 | _NOTE(ARGSUSED(0)) | |
680 | void | |
681 | zap_evict(dmu_buf_t *db, void *vzap) | |
682 | { | |
683 | zap_t *zap = vzap; | |
684 | ||
685 | rw_destroy(&zap->zap_rwlock); | |
686 | ||
687 | if (zap->zap_ismicro) | |
688 | mze_destroy(zap); | |
689 | else | |
690 | mutex_destroy(&zap->zap_f.zap_num_entries_mtx); | |
691 | ||
692 | kmem_free(zap, sizeof (zap_t)); | |
693 | } | |
694 | ||
695 | int | |
696 | zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) | |
697 | { | |
698 | zap_t *zap; | |
699 | int err; | |
700 | ||
701 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
702 | if (err) | |
703 | return (err); | |
704 | if (!zap->zap_ismicro) { | |
705 | err = fzap_count(zap, count); | |
706 | } else { | |
707 | *count = zap->zap_m.zap_num_entries; | |
708 | } | |
709 | zap_unlockdir(zap); | |
710 | return (err); | |
711 | } | |
712 | ||
713 | /* | |
714 | * zn may be NULL; if not specified, it will be computed if needed. | |
715 | * See also the comment above zap_entry_normalization_conflict(). | |
716 | */ | |
717 | static boolean_t | |
718 | mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) | |
719 | { | |
720 | mzap_ent_t *other; | |
721 | int direction = AVL_BEFORE; | |
722 | boolean_t allocdzn = B_FALSE; | |
723 | ||
724 | if (zap->zap_normflags == 0) | |
725 | return (B_FALSE); | |
726 | ||
727 | again: | |
728 | for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); | |
729 | other && other->mze_hash == mze->mze_hash; | |
730 | other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { | |
731 | ||
732 | if (zn == NULL) { | |
428870ff | 733 | zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, |
34dc7c2f BB |
734 | MT_FIRST); |
735 | allocdzn = B_TRUE; | |
736 | } | |
428870ff | 737 | if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { |
34dc7c2f BB |
738 | if (allocdzn) |
739 | zap_name_free(zn); | |
740 | return (B_TRUE); | |
741 | } | |
742 | } | |
743 | ||
744 | if (direction == AVL_BEFORE) { | |
745 | direction = AVL_AFTER; | |
746 | goto again; | |
747 | } | |
748 | ||
749 | if (allocdzn) | |
750 | zap_name_free(zn); | |
751 | return (B_FALSE); | |
752 | } | |
753 | ||
754 | /* | |
755 | * Routines for manipulating attributes. | |
756 | */ | |
757 | ||
758 | int | |
759 | zap_lookup(objset_t *os, uint64_t zapobj, const char *name, | |
760 | uint64_t integer_size, uint64_t num_integers, void *buf) | |
761 | { | |
762 | return (zap_lookup_norm(os, zapobj, name, integer_size, | |
763 | num_integers, buf, MT_EXACT, NULL, 0, NULL)); | |
764 | } | |
765 | ||
766 | int | |
767 | zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, | |
768 | uint64_t integer_size, uint64_t num_integers, void *buf, | |
769 | matchtype_t mt, char *realname, int rn_len, | |
770 | boolean_t *ncp) | |
771 | { | |
772 | zap_t *zap; | |
773 | int err; | |
774 | mzap_ent_t *mze; | |
775 | zap_name_t *zn; | |
776 | ||
777 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
778 | if (err) | |
779 | return (err); | |
780 | zn = zap_name_alloc(zap, name, mt); | |
781 | if (zn == NULL) { | |
782 | zap_unlockdir(zap); | |
783 | return (ENOTSUP); | |
784 | } | |
785 | ||
786 | if (!zap->zap_ismicro) { | |
787 | err = fzap_lookup(zn, integer_size, num_integers, buf, | |
788 | realname, rn_len, ncp); | |
789 | } else { | |
790 | mze = mze_find(zn); | |
791 | if (mze == NULL) { | |
792 | err = ENOENT; | |
793 | } else { | |
794 | if (num_integers < 1) { | |
795 | err = EOVERFLOW; | |
796 | } else if (integer_size != 8) { | |
797 | err = EINVAL; | |
798 | } else { | |
428870ff BB |
799 | *(uint64_t *)buf = |
800 | MZE_PHYS(zap, mze)->mze_value; | |
34dc7c2f | 801 | (void) strlcpy(realname, |
428870ff | 802 | MZE_PHYS(zap, mze)->mze_name, rn_len); |
34dc7c2f BB |
803 | if (ncp) { |
804 | *ncp = mzap_normalization_conflict(zap, | |
805 | zn, mze); | |
806 | } | |
807 | } | |
808 | } | |
809 | } | |
810 | zap_name_free(zn); | |
811 | zap_unlockdir(zap); | |
812 | return (err); | |
813 | } | |
814 | ||
428870ff BB |
815 | int |
816 | zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
817 | int key_numints) | |
818 | { | |
819 | zap_t *zap; | |
820 | int err; | |
821 | zap_name_t *zn; | |
822 | ||
823 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
824 | if (err) | |
825 | return (err); | |
826 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
827 | if (zn == NULL) { | |
828 | zap_unlockdir(zap); | |
829 | return (ENOTSUP); | |
830 | } | |
831 | ||
832 | fzap_prefetch(zn); | |
833 | zap_name_free(zn); | |
834 | zap_unlockdir(zap); | |
835 | return (err); | |
836 | } | |
837 | ||
838 | int | |
839 | zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
840 | int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) | |
841 | { | |
842 | zap_t *zap; | |
843 | int err; | |
844 | zap_name_t *zn; | |
845 | ||
846 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
847 | if (err) | |
848 | return (err); | |
849 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
850 | if (zn == NULL) { | |
851 | zap_unlockdir(zap); | |
852 | return (ENOTSUP); | |
853 | } | |
854 | ||
855 | err = fzap_lookup(zn, integer_size, num_integers, buf, | |
856 | NULL, 0, NULL); | |
857 | zap_name_free(zn); | |
858 | zap_unlockdir(zap); | |
859 | return (err); | |
860 | } | |
861 | ||
862 | int | |
863 | zap_contains(objset_t *os, uint64_t zapobj, const char *name) | |
864 | { | |
865 | int err = (zap_lookup_norm(os, zapobj, name, 0, | |
866 | 0, NULL, MT_EXACT, NULL, 0, NULL)); | |
867 | if (err == EOVERFLOW || err == EINVAL) | |
868 | err = 0; /* found, but skipped reading the value */ | |
869 | return (err); | |
870 | } | |
871 | ||
34dc7c2f BB |
872 | int |
873 | zap_length(objset_t *os, uint64_t zapobj, const char *name, | |
874 | uint64_t *integer_size, uint64_t *num_integers) | |
875 | { | |
876 | zap_t *zap; | |
877 | int err; | |
878 | mzap_ent_t *mze; | |
879 | zap_name_t *zn; | |
880 | ||
881 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
882 | if (err) | |
883 | return (err); | |
884 | zn = zap_name_alloc(zap, name, MT_EXACT); | |
885 | if (zn == NULL) { | |
886 | zap_unlockdir(zap); | |
887 | return (ENOTSUP); | |
888 | } | |
889 | if (!zap->zap_ismicro) { | |
890 | err = fzap_length(zn, integer_size, num_integers); | |
891 | } else { | |
892 | mze = mze_find(zn); | |
893 | if (mze == NULL) { | |
894 | err = ENOENT; | |
895 | } else { | |
896 | if (integer_size) | |
897 | *integer_size = 8; | |
898 | if (num_integers) | |
899 | *num_integers = 1; | |
900 | } | |
901 | } | |
902 | zap_name_free(zn); | |
903 | zap_unlockdir(zap); | |
904 | return (err); | |
905 | } | |
906 | ||
428870ff BB |
907 | int |
908 | zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
909 | int key_numints, uint64_t *integer_size, uint64_t *num_integers) | |
910 | { | |
911 | zap_t *zap; | |
912 | int err; | |
913 | zap_name_t *zn; | |
914 | ||
915 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
916 | if (err) | |
917 | return (err); | |
918 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
919 | if (zn == NULL) { | |
920 | zap_unlockdir(zap); | |
921 | return (ENOTSUP); | |
922 | } | |
923 | err = fzap_length(zn, integer_size, num_integers); | |
924 | zap_name_free(zn); | |
925 | zap_unlockdir(zap); | |
926 | return (err); | |
927 | } | |
928 | ||
34dc7c2f BB |
929 | static void |
930 | mzap_addent(zap_name_t *zn, uint64_t value) | |
931 | { | |
932 | int i; | |
933 | zap_t *zap = zn->zn_zap; | |
934 | int start = zap->zap_m.zap_alloc_next; | |
935 | uint32_t cd; | |
936 | ||
34dc7c2f BB |
937 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); |
938 | ||
939 | #ifdef ZFS_DEBUG | |
940 | for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { | |
1fde1e37 | 941 | ASSERTV(mzap_ent_phys_t *mze=&zap->zap_m.zap_phys->mz_chunk[i]); |
428870ff | 942 | ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); |
34dc7c2f BB |
943 | } |
944 | #endif | |
945 | ||
946 | cd = mze_find_unused_cd(zap, zn->zn_hash); | |
947 | /* given the limited size of the microzap, this can't happen */ | |
428870ff | 948 | ASSERT(cd < zap_maxcd(zap)); |
34dc7c2f BB |
949 | |
950 | again: | |
951 | for (i = start; i < zap->zap_m.zap_num_chunks; i++) { | |
952 | mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i]; | |
953 | if (mze->mze_name[0] == 0) { | |
954 | mze->mze_value = value; | |
955 | mze->mze_cd = cd; | |
428870ff | 956 | (void) strcpy(mze->mze_name, zn->zn_key_orig); |
34dc7c2f BB |
957 | zap->zap_m.zap_num_entries++; |
958 | zap->zap_m.zap_alloc_next = i+1; | |
959 | if (zap->zap_m.zap_alloc_next == | |
960 | zap->zap_m.zap_num_chunks) | |
961 | zap->zap_m.zap_alloc_next = 0; | |
428870ff | 962 | mze_insert(zap, i, zn->zn_hash); |
34dc7c2f BB |
963 | return; |
964 | } | |
965 | } | |
966 | if (start != 0) { | |
967 | start = 0; | |
968 | goto again; | |
969 | } | |
970 | ASSERT(!"out of entries!"); | |
971 | } | |
972 | ||
973 | int | |
428870ff | 974 | zap_add(objset_t *os, uint64_t zapobj, const char *key, |
34dc7c2f BB |
975 | int integer_size, uint64_t num_integers, |
976 | const void *val, dmu_tx_t *tx) | |
977 | { | |
978 | zap_t *zap; | |
979 | int err; | |
980 | mzap_ent_t *mze; | |
981 | const uint64_t *intval = val; | |
982 | zap_name_t *zn; | |
983 | ||
984 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); | |
985 | if (err) | |
986 | return (err); | |
428870ff | 987 | zn = zap_name_alloc(zap, key, MT_EXACT); |
34dc7c2f BB |
988 | if (zn == NULL) { |
989 | zap_unlockdir(zap); | |
990 | return (ENOTSUP); | |
991 | } | |
992 | if (!zap->zap_ismicro) { | |
993 | err = fzap_add(zn, integer_size, num_integers, val, tx); | |
994 | zap = zn->zn_zap; /* fzap_add() may change zap */ | |
995 | } else if (integer_size != 8 || num_integers != 1 || | |
428870ff BB |
996 | strlen(key) >= MZAP_NAME_LEN) { |
997 | err = mzap_upgrade(&zn->zn_zap, tx, 0); | |
34dc7c2f BB |
998 | if (err == 0) |
999 | err = fzap_add(zn, integer_size, num_integers, val, tx); | |
1000 | zap = zn->zn_zap; /* fzap_add() may change zap */ | |
1001 | } else { | |
1002 | mze = mze_find(zn); | |
1003 | if (mze != NULL) { | |
1004 | err = EEXIST; | |
1005 | } else { | |
1006 | mzap_addent(zn, *intval); | |
1007 | } | |
1008 | } | |
1009 | ASSERT(zap == zn->zn_zap); | |
1010 | zap_name_free(zn); | |
1011 | if (zap != NULL) /* may be NULL if fzap_add() failed */ | |
1012 | zap_unlockdir(zap); | |
1013 | return (err); | |
1014 | } | |
1015 | ||
428870ff BB |
1016 | int |
1017 | zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1018 | int key_numints, int integer_size, uint64_t num_integers, | |
1019 | const void *val, dmu_tx_t *tx) | |
1020 | { | |
1021 | zap_t *zap; | |
1022 | int err; | |
1023 | zap_name_t *zn; | |
1024 | ||
1025 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); | |
1026 | if (err) | |
1027 | return (err); | |
1028 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
1029 | if (zn == NULL) { | |
1030 | zap_unlockdir(zap); | |
1031 | return (ENOTSUP); | |
1032 | } | |
1033 | err = fzap_add(zn, integer_size, num_integers, val, tx); | |
1034 | zap = zn->zn_zap; /* fzap_add() may change zap */ | |
1035 | zap_name_free(zn); | |
1036 | if (zap != NULL) /* may be NULL if fzap_add() failed */ | |
1037 | zap_unlockdir(zap); | |
1038 | return (err); | |
1039 | } | |
1040 | ||
34dc7c2f BB |
1041 | int |
1042 | zap_update(objset_t *os, uint64_t zapobj, const char *name, | |
1043 | int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) | |
1044 | { | |
1045 | zap_t *zap; | |
1046 | mzap_ent_t *mze; | |
1047 | const uint64_t *intval = val; | |
1048 | zap_name_t *zn; | |
1049 | int err; | |
1050 | ||
428870ff | 1051 | #ifdef ZFS_DEBUG |
1fde1e37 BB |
1052 | uint64_t oldval; |
1053 | ||
428870ff BB |
1054 | /* |
1055 | * If there is an old value, it shouldn't change across the | |
1056 | * lockdir (eg, due to bprewrite's xlation). | |
1057 | */ | |
1058 | if (integer_size == 8 && num_integers == 1) | |
1059 | (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); | |
1060 | #endif | |
1061 | ||
34dc7c2f BB |
1062 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); |
1063 | if (err) | |
1064 | return (err); | |
1065 | zn = zap_name_alloc(zap, name, MT_EXACT); | |
1066 | if (zn == NULL) { | |
1067 | zap_unlockdir(zap); | |
1068 | return (ENOTSUP); | |
1069 | } | |
1070 | if (!zap->zap_ismicro) { | |
1071 | err = fzap_update(zn, integer_size, num_integers, val, tx); | |
1072 | zap = zn->zn_zap; /* fzap_update() may change zap */ | |
1073 | } else if (integer_size != 8 || num_integers != 1 || | |
1074 | strlen(name) >= MZAP_NAME_LEN) { | |
1075 | dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", | |
1076 | zapobj, integer_size, num_integers, name); | |
428870ff | 1077 | err = mzap_upgrade(&zn->zn_zap, tx, 0); |
34dc7c2f BB |
1078 | if (err == 0) |
1079 | err = fzap_update(zn, integer_size, num_integers, | |
1080 | val, tx); | |
1081 | zap = zn->zn_zap; /* fzap_update() may change zap */ | |
1082 | } else { | |
1083 | mze = mze_find(zn); | |
1084 | if (mze != NULL) { | |
428870ff BB |
1085 | ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); |
1086 | MZE_PHYS(zap, mze)->mze_value = *intval; | |
34dc7c2f BB |
1087 | } else { |
1088 | mzap_addent(zn, *intval); | |
1089 | } | |
1090 | } | |
1091 | ASSERT(zap == zn->zn_zap); | |
1092 | zap_name_free(zn); | |
1093 | if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ | |
1094 | zap_unlockdir(zap); | |
1095 | return (err); | |
1096 | } | |
1097 | ||
428870ff BB |
1098 | int |
1099 | zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1100 | int key_numints, | |
1101 | int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) | |
1102 | { | |
1103 | zap_t *zap; | |
1104 | zap_name_t *zn; | |
1105 | int err; | |
1106 | ||
1107 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); | |
1108 | if (err) | |
1109 | return (err); | |
1110 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
1111 | if (zn == NULL) { | |
1112 | zap_unlockdir(zap); | |
1113 | return (ENOTSUP); | |
1114 | } | |
1115 | err = fzap_update(zn, integer_size, num_integers, val, tx); | |
1116 | zap = zn->zn_zap; /* fzap_update() may change zap */ | |
1117 | zap_name_free(zn); | |
1118 | if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ | |
1119 | zap_unlockdir(zap); | |
1120 | return (err); | |
1121 | } | |
1122 | ||
34dc7c2f BB |
1123 | int |
1124 | zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) | |
1125 | { | |
1126 | return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); | |
1127 | } | |
1128 | ||
1129 | int | |
1130 | zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, | |
1131 | matchtype_t mt, dmu_tx_t *tx) | |
1132 | { | |
1133 | zap_t *zap; | |
1134 | int err; | |
1135 | mzap_ent_t *mze; | |
1136 | zap_name_t *zn; | |
1137 | ||
1138 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); | |
1139 | if (err) | |
1140 | return (err); | |
1141 | zn = zap_name_alloc(zap, name, mt); | |
1142 | if (zn == NULL) { | |
1143 | zap_unlockdir(zap); | |
1144 | return (ENOTSUP); | |
1145 | } | |
1146 | if (!zap->zap_ismicro) { | |
1147 | err = fzap_remove(zn, tx); | |
1148 | } else { | |
1149 | mze = mze_find(zn); | |
1150 | if (mze == NULL) { | |
1151 | err = ENOENT; | |
1152 | } else { | |
1153 | zap->zap_m.zap_num_entries--; | |
1154 | bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid], | |
1155 | sizeof (mzap_ent_phys_t)); | |
1156 | mze_remove(zap, mze); | |
1157 | } | |
1158 | } | |
1159 | zap_name_free(zn); | |
1160 | zap_unlockdir(zap); | |
1161 | return (err); | |
1162 | } | |
1163 | ||
428870ff BB |
1164 | int |
1165 | zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1166 | int key_numints, dmu_tx_t *tx) | |
1167 | { | |
1168 | zap_t *zap; | |
1169 | int err; | |
1170 | zap_name_t *zn; | |
1171 | ||
1172 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); | |
1173 | if (err) | |
1174 | return (err); | |
1175 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
1176 | if (zn == NULL) { | |
1177 | zap_unlockdir(zap); | |
1178 | return (ENOTSUP); | |
1179 | } | |
1180 | err = fzap_remove(zn, tx); | |
1181 | zap_name_free(zn); | |
1182 | zap_unlockdir(zap); | |
1183 | return (err); | |
1184 | } | |
1185 | ||
34dc7c2f BB |
1186 | /* |
1187 | * Routines for iterating over the attributes. | |
1188 | */ | |
1189 | ||
34dc7c2f BB |
1190 | void |
1191 | zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, | |
1192 | uint64_t serialized) | |
1193 | { | |
1194 | zc->zc_objset = os; | |
1195 | zc->zc_zap = NULL; | |
1196 | zc->zc_leaf = NULL; | |
1197 | zc->zc_zapobj = zapobj; | |
428870ff BB |
1198 | zc->zc_serialized = serialized; |
1199 | zc->zc_hash = 0; | |
1200 | zc->zc_cd = 0; | |
34dc7c2f BB |
1201 | } |
1202 | ||
1203 | void | |
1204 | zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) | |
1205 | { | |
1206 | zap_cursor_init_serialized(zc, os, zapobj, 0); | |
1207 | } | |
1208 | ||
1209 | void | |
1210 | zap_cursor_fini(zap_cursor_t *zc) | |
1211 | { | |
1212 | if (zc->zc_zap) { | |
1213 | rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); | |
1214 | zap_unlockdir(zc->zc_zap); | |
1215 | zc->zc_zap = NULL; | |
1216 | } | |
1217 | if (zc->zc_leaf) { | |
1218 | rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); | |
1219 | zap_put_leaf(zc->zc_leaf); | |
1220 | zc->zc_leaf = NULL; | |
1221 | } | |
1222 | zc->zc_objset = NULL; | |
1223 | } | |
1224 | ||
1225 | uint64_t | |
1226 | zap_cursor_serialize(zap_cursor_t *zc) | |
1227 | { | |
1228 | if (zc->zc_hash == -1ULL) | |
1229 | return (-1ULL); | |
428870ff BB |
1230 | if (zc->zc_zap == NULL) |
1231 | return (zc->zc_serialized); | |
1232 | ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); | |
1233 | ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); | |
1234 | ||
1235 | /* | |
1236 | * We want to keep the high 32 bits of the cursor zero if we can, so | |
1237 | * that 32-bit programs can access this. So usually use a small | |
1238 | * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits | |
1239 | * of the cursor. | |
1240 | * | |
1241 | * [ collision differentiator | zap_hashbits()-bit hash value ] | |
1242 | */ | |
1243 | return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | | |
1244 | ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); | |
34dc7c2f BB |
1245 | } |
1246 | ||
1247 | int | |
1248 | zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) | |
1249 | { | |
1250 | int err; | |
1251 | avl_index_t idx; | |
1252 | mzap_ent_t mze_tofind; | |
1253 | mzap_ent_t *mze; | |
1254 | ||
1255 | if (zc->zc_hash == -1ULL) | |
1256 | return (ENOENT); | |
1257 | ||
1258 | if (zc->zc_zap == NULL) { | |
428870ff | 1259 | int hb; |
34dc7c2f BB |
1260 | err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, |
1261 | RW_READER, TRUE, FALSE, &zc->zc_zap); | |
1262 | if (err) | |
1263 | return (err); | |
428870ff BB |
1264 | |
1265 | /* | |
1266 | * To support zap_cursor_init_serialized, advance, retrieve, | |
1267 | * we must add to the existing zc_cd, which may already | |
1268 | * be 1 due to the zap_cursor_advance. | |
1269 | */ | |
1270 | ASSERT(zc->zc_hash == 0); | |
1271 | hb = zap_hashbits(zc->zc_zap); | |
1272 | zc->zc_hash = zc->zc_serialized << (64 - hb); | |
1273 | zc->zc_cd += zc->zc_serialized >> hb; | |
1274 | if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ | |
1275 | zc->zc_cd = 0; | |
34dc7c2f BB |
1276 | } else { |
1277 | rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); | |
1278 | } | |
1279 | if (!zc->zc_zap->zap_ismicro) { | |
1280 | err = fzap_cursor_retrieve(zc->zc_zap, zc, za); | |
1281 | } else { | |
1282 | err = ENOENT; | |
1283 | ||
1284 | mze_tofind.mze_hash = zc->zc_hash; | |
428870ff | 1285 | mze_tofind.mze_cd = zc->zc_cd; |
34dc7c2f BB |
1286 | |
1287 | mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); | |
1288 | if (mze == NULL) { | |
1289 | mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, | |
1290 | idx, AVL_AFTER); | |
1291 | } | |
1292 | if (mze) { | |
428870ff BB |
1293 | mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); |
1294 | ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); | |
34dc7c2f BB |
1295 | za->za_normalization_conflict = |
1296 | mzap_normalization_conflict(zc->zc_zap, NULL, mze); | |
1297 | za->za_integer_length = 8; | |
1298 | za->za_num_integers = 1; | |
428870ff BB |
1299 | za->za_first_integer = mzep->mze_value; |
1300 | (void) strcpy(za->za_name, mzep->mze_name); | |
34dc7c2f | 1301 | zc->zc_hash = mze->mze_hash; |
428870ff | 1302 | zc->zc_cd = mze->mze_cd; |
34dc7c2f BB |
1303 | err = 0; |
1304 | } else { | |
1305 | zc->zc_hash = -1ULL; | |
1306 | } | |
1307 | } | |
1308 | rw_exit(&zc->zc_zap->zap_rwlock); | |
1309 | return (err); | |
1310 | } | |
1311 | ||
1312 | void | |
1313 | zap_cursor_advance(zap_cursor_t *zc) | |
1314 | { | |
1315 | if (zc->zc_hash == -1ULL) | |
1316 | return; | |
1317 | zc->zc_cd++; | |
428870ff BB |
1318 | } |
1319 | ||
1320 | int | |
1321 | zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt) | |
1322 | { | |
1323 | int err = 0; | |
1324 | mzap_ent_t *mze; | |
1325 | zap_name_t *zn; | |
1326 | ||
1327 | if (zc->zc_zap == NULL) { | |
1328 | err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, | |
1329 | RW_READER, TRUE, FALSE, &zc->zc_zap); | |
1330 | if (err) | |
1331 | return (err); | |
1332 | } else { | |
1333 | rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); | |
34dc7c2f | 1334 | } |
428870ff BB |
1335 | |
1336 | zn = zap_name_alloc(zc->zc_zap, name, mt); | |
1337 | if (zn == NULL) { | |
1338 | rw_exit(&zc->zc_zap->zap_rwlock); | |
1339 | return (ENOTSUP); | |
1340 | } | |
1341 | ||
1342 | if (!zc->zc_zap->zap_ismicro) { | |
1343 | err = fzap_cursor_move_to_key(zc, zn); | |
1344 | } else { | |
1345 | mze = mze_find(zn); | |
1346 | if (mze == NULL) { | |
1347 | err = ENOENT; | |
1348 | goto out; | |
1349 | } | |
1350 | zc->zc_hash = mze->mze_hash; | |
1351 | zc->zc_cd = mze->mze_cd; | |
1352 | } | |
1353 | ||
1354 | out: | |
1355 | zap_name_free(zn); | |
1356 | rw_exit(&zc->zc_zap->zap_rwlock); | |
1357 | return (err); | |
34dc7c2f BB |
1358 | } |
1359 | ||
1360 | int | |
1361 | zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) | |
1362 | { | |
1363 | int err; | |
1364 | zap_t *zap; | |
1365 | ||
1366 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
1367 | if (err) | |
1368 | return (err); | |
1369 | ||
1370 | bzero(zs, sizeof (zap_stats_t)); | |
1371 | ||
1372 | if (zap->zap_ismicro) { | |
1373 | zs->zs_blocksize = zap->zap_dbuf->db_size; | |
1374 | zs->zs_num_entries = zap->zap_m.zap_num_entries; | |
1375 | zs->zs_num_blocks = 1; | |
1376 | } else { | |
1377 | fzap_get_stats(zap, zs); | |
1378 | } | |
1379 | zap_unlockdir(zap); | |
1380 | return (0); | |
1381 | } | |
9babb374 BB |
1382 | |
1383 | int | |
1384 | zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, | |
45d1cae3 | 1385 | uint64_t *towrite, uint64_t *tooverwrite) |
9babb374 BB |
1386 | { |
1387 | zap_t *zap; | |
1388 | int err = 0; | |
1389 | ||
1390 | ||
1391 | /* | |
1392 | * Since, we don't have a name, we cannot figure out which blocks will | |
1393 | * be affected in this operation. So, account for the worst case : | |
1394 | * - 3 blocks overwritten: target leaf, ptrtbl block, header block | |
1395 | * - 4 new blocks written if adding: | |
1396 | * - 2 blocks for possibly split leaves, | |
1397 | * - 2 grown ptrtbl blocks | |
1398 | * | |
1399 | * This also accomodates the case where an add operation to a fairly | |
1400 | * large microzap results in a promotion to fatzap. | |
1401 | */ | |
1402 | if (name == NULL) { | |
1403 | *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; | |
1404 | return (err); | |
1405 | } | |
1406 | ||
1407 | /* | |
330d06f9 | 1408 | * We lock the zap with adding == FALSE. Because, if we pass |
9babb374 BB |
1409 | * the actual value of add, it could trigger a mzap_upgrade(). |
1410 | * At present we are just evaluating the possibility of this operation | |
1411 | * and hence we donot want to trigger an upgrade. | |
1412 | */ | |
1413 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
1414 | if (err) | |
1415 | return (err); | |
1416 | ||
1417 | if (!zap->zap_ismicro) { | |
1418 | zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); | |
1419 | if (zn) { | |
1420 | err = fzap_count_write(zn, add, towrite, | |
1421 | tooverwrite); | |
1422 | zap_name_free(zn); | |
1423 | } else { | |
1424 | /* | |
1425 | * We treat this case as similar to (name == NULL) | |
1426 | */ | |
1427 | *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; | |
1428 | } | |
1429 | } else { | |
45d1cae3 BB |
1430 | /* |
1431 | * We are here if (name != NULL) and this is a micro-zap. | |
1432 | * We account for the header block depending on whether it | |
1433 | * is freeable. | |
1434 | * | |
1435 | * Incase of an add-operation it is hard to find out | |
1436 | * if this add will promote this microzap to fatzap. | |
1437 | * Hence, we consider the worst case and account for the | |
1438 | * blocks assuming this microzap would be promoted to a | |
1439 | * fatzap. | |
1440 | * | |
1441 | * 1 block overwritten : header block | |
1442 | * 4 new blocks written : 2 new split leaf, 2 grown | |
1443 | * ptrtbl blocks | |
1444 | */ | |
1445 | if (dmu_buf_freeable(zap->zap_dbuf)) | |
1446 | *tooverwrite += SPA_MAXBLOCKSIZE; | |
1447 | else | |
1448 | *towrite += SPA_MAXBLOCKSIZE; | |
1449 | ||
1450 | if (add) { | |
1451 | *towrite += 4 * SPA_MAXBLOCKSIZE; | |
9babb374 BB |
1452 | } |
1453 | } | |
1454 | ||
1455 | zap_unlockdir(zap); | |
1456 | return (err); | |
1457 | } | |
c28b2279 BB |
1458 | |
1459 | #if defined(_KERNEL) && defined(HAVE_SPL) | |
c28b2279 | 1460 | EXPORT_SYMBOL(zap_create); |
dee28b07 BB |
1461 | EXPORT_SYMBOL(zap_create_norm); |
1462 | EXPORT_SYMBOL(zap_create_flags); | |
1463 | EXPORT_SYMBOL(zap_create_claim); | |
1464 | EXPORT_SYMBOL(zap_create_claim_norm); | |
1465 | EXPORT_SYMBOL(zap_destroy); | |
c28b2279 BB |
1466 | EXPORT_SYMBOL(zap_lookup); |
1467 | EXPORT_SYMBOL(zap_lookup_norm); | |
dee28b07 BB |
1468 | EXPORT_SYMBOL(zap_lookup_uint64); |
1469 | EXPORT_SYMBOL(zap_contains); | |
1470 | EXPORT_SYMBOL(zap_prefetch_uint64); | |
1471 | EXPORT_SYMBOL(zap_count_write); | |
1472 | EXPORT_SYMBOL(zap_add); | |
1473 | EXPORT_SYMBOL(zap_add_uint64); | |
c28b2279 | 1474 | EXPORT_SYMBOL(zap_update); |
dee28b07 BB |
1475 | EXPORT_SYMBOL(zap_update_uint64); |
1476 | EXPORT_SYMBOL(zap_length); | |
1477 | EXPORT_SYMBOL(zap_length_uint64); | |
1478 | EXPORT_SYMBOL(zap_remove); | |
1479 | EXPORT_SYMBOL(zap_remove_norm); | |
1480 | EXPORT_SYMBOL(zap_remove_uint64); | |
1481 | EXPORT_SYMBOL(zap_count); | |
1482 | EXPORT_SYMBOL(zap_value_search); | |
1483 | EXPORT_SYMBOL(zap_join); | |
1484 | EXPORT_SYMBOL(zap_join_increment); | |
1485 | EXPORT_SYMBOL(zap_add_int); | |
1486 | EXPORT_SYMBOL(zap_remove_int); | |
1487 | EXPORT_SYMBOL(zap_lookup_int); | |
1488 | EXPORT_SYMBOL(zap_increment_int); | |
1489 | EXPORT_SYMBOL(zap_add_int_key); | |
1490 | EXPORT_SYMBOL(zap_lookup_int_key); | |
1491 | EXPORT_SYMBOL(zap_increment); | |
1492 | EXPORT_SYMBOL(zap_cursor_init); | |
1493 | EXPORT_SYMBOL(zap_cursor_fini); | |
1494 | EXPORT_SYMBOL(zap_cursor_retrieve); | |
1495 | EXPORT_SYMBOL(zap_cursor_advance); | |
1496 | EXPORT_SYMBOL(zap_cursor_serialize); | |
1497 | EXPORT_SYMBOL(zap_cursor_move_to_key); | |
1498 | EXPORT_SYMBOL(zap_cursor_init_serialized); | |
1499 | EXPORT_SYMBOL(zap_get_stats); | |
c28b2279 | 1500 | #endif |