]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
9 | * or http://www.opensolaris.org/os/licensing. | |
10 | * See the License for the specific language governing permissions | |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
21 | /* | |
428870ff | 22 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
9bd274dd | 23 | * Copyright (c) 2011, 2014 by Delphix. All rights reserved. |
0c66c32d | 24 | * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. |
34dc7c2f BB |
25 | */ |
26 | ||
428870ff | 27 | #include <sys/zio.h> |
34dc7c2f BB |
28 | #include <sys/spa.h> |
29 | #include <sys/dmu.h> | |
30 | #include <sys/zfs_context.h> | |
31 | #include <sys/zap.h> | |
32 | #include <sys/refcount.h> | |
33 | #include <sys/zap_impl.h> | |
34 | #include <sys/zap_leaf.h> | |
35 | #include <sys/avl.h> | |
428870ff | 36 | #include <sys/arc.h> |
34dc7c2f BB |
37 | |
38 | #ifdef _KERNEL | |
39 | #include <sys/sunddi.h> | |
40 | #endif | |
41 | ||
d683ddbb JG |
42 | extern inline mzap_phys_t *zap_m_phys(zap_t *zap); |
43 | ||
428870ff | 44 | static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags); |
34dc7c2f | 45 | |
428870ff BB |
46 | uint64_t |
47 | zap_getflags(zap_t *zap) | |
48 | { | |
49 | if (zap->zap_ismicro) | |
50 | return (0); | |
d683ddbb | 51 | return (zap_f_phys(zap)->zap_flags); |
428870ff | 52 | } |
34dc7c2f | 53 | |
428870ff BB |
54 | int |
55 | zap_hashbits(zap_t *zap) | |
34dc7c2f | 56 | { |
428870ff BB |
57 | if (zap_getflags(zap) & ZAP_FLAG_HASH64) |
58 | return (48); | |
59 | else | |
60 | return (28); | |
61 | } | |
34dc7c2f | 62 | |
428870ff BB |
63 | uint32_t |
64 | zap_maxcd(zap_t *zap) | |
65 | { | |
66 | if (zap_getflags(zap) & ZAP_FLAG_HASH64) | |
67 | return ((1<<16)-1); | |
68 | else | |
69 | return (-1U); | |
70 | } | |
34dc7c2f | 71 | |
428870ff BB |
72 | static uint64_t |
73 | zap_hash(zap_name_t *zn) | |
74 | { | |
75 | zap_t *zap = zn->zn_zap; | |
76 | uint64_t h = 0; | |
34dc7c2f | 77 | |
428870ff BB |
78 | if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { |
79 | ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); | |
80 | h = *(uint64_t *)zn->zn_key_orig; | |
81 | } else { | |
82 | h = zap->zap_salt; | |
83 | ASSERT(h != 0); | |
84 | ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); | |
85 | ||
86 | if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { | |
87 | int i; | |
88 | const uint64_t *wp = zn->zn_key_norm; | |
89 | ||
90 | ASSERT(zn->zn_key_intlen == 8); | |
91 | for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) { | |
92 | int j; | |
93 | uint64_t word = *wp; | |
94 | ||
95 | for (j = 0; j < zn->zn_key_intlen; j++) { | |
96 | h = (h >> 8) ^ | |
97 | zfs_crc64_table[(h ^ word) & 0xFF]; | |
98 | word >>= NBBY; | |
99 | } | |
100 | } | |
101 | } else { | |
102 | int i, len; | |
103 | const uint8_t *cp = zn->zn_key_norm; | |
104 | ||
105 | /* | |
106 | * We previously stored the terminating null on | |
107 | * disk, but didn't hash it, so we need to | |
108 | * continue to not hash it. (The | |
109 | * zn_key_*_numints includes the terminating | |
110 | * null for non-binary keys.) | |
111 | */ | |
112 | len = zn->zn_key_norm_numints - 1; | |
113 | ||
114 | ASSERT(zn->zn_key_intlen == 1); | |
115 | for (i = 0; i < len; cp++, i++) { | |
116 | h = (h >> 8) ^ | |
117 | zfs_crc64_table[(h ^ *cp) & 0xFF]; | |
118 | } | |
119 | } | |
120 | } | |
34dc7c2f | 121 | /* |
428870ff BB |
122 | * Don't use all 64 bits, since we need some in the cookie for |
123 | * the collision differentiator. We MUST use the high bits, | |
124 | * since those are the ones that we first pay attention to when | |
34dc7c2f BB |
125 | * chosing the bucket. |
126 | */ | |
428870ff | 127 | h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); |
34dc7c2f | 128 | |
428870ff | 129 | return (h); |
34dc7c2f BB |
130 | } |
131 | ||
132 | static int | |
133 | zap_normalize(zap_t *zap, const char *name, char *namenorm) | |
134 | { | |
135 | size_t inlen, outlen; | |
136 | int err; | |
137 | ||
428870ff BB |
138 | ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); |
139 | ||
34dc7c2f BB |
140 | inlen = strlen(name) + 1; |
141 | outlen = ZAP_MAXNAMELEN; | |
142 | ||
143 | err = 0; | |
144 | (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, | |
9babb374 BB |
145 | zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL | |
146 | U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err); | |
34dc7c2f BB |
147 | |
148 | return (err); | |
149 | } | |
150 | ||
151 | boolean_t | |
152 | zap_match(zap_name_t *zn, const char *matchname) | |
153 | { | |
428870ff BB |
154 | ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); |
155 | ||
34dc7c2f BB |
156 | if (zn->zn_matchtype == MT_FIRST) { |
157 | char norm[ZAP_MAXNAMELEN]; | |
158 | ||
159 | if (zap_normalize(zn->zn_zap, matchname, norm) != 0) | |
160 | return (B_FALSE); | |
161 | ||
428870ff | 162 | return (strcmp(zn->zn_key_norm, norm) == 0); |
34dc7c2f BB |
163 | } else { |
164 | /* MT_BEST or MT_EXACT */ | |
428870ff | 165 | return (strcmp(zn->zn_key_orig, matchname) == 0); |
34dc7c2f BB |
166 | } |
167 | } | |
168 | ||
169 | void | |
170 | zap_name_free(zap_name_t *zn) | |
171 | { | |
172 | kmem_free(zn, sizeof (zap_name_t)); | |
173 | } | |
174 | ||
34dc7c2f | 175 | zap_name_t * |
428870ff | 176 | zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) |
34dc7c2f | 177 | { |
79c76d5b | 178 | zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); |
34dc7c2f BB |
179 | |
180 | zn->zn_zap = zap; | |
428870ff BB |
181 | zn->zn_key_intlen = sizeof (*key); |
182 | zn->zn_key_orig = key; | |
183 | zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; | |
34dc7c2f BB |
184 | zn->zn_matchtype = mt; |
185 | if (zap->zap_normflags) { | |
428870ff | 186 | if (zap_normalize(zap, key, zn->zn_normbuf) != 0) { |
34dc7c2f BB |
187 | zap_name_free(zn); |
188 | return (NULL); | |
189 | } | |
428870ff BB |
190 | zn->zn_key_norm = zn->zn_normbuf; |
191 | zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; | |
34dc7c2f BB |
192 | } else { |
193 | if (mt != MT_EXACT) { | |
194 | zap_name_free(zn); | |
195 | return (NULL); | |
196 | } | |
428870ff BB |
197 | zn->zn_key_norm = zn->zn_key_orig; |
198 | zn->zn_key_norm_numints = zn->zn_key_orig_numints; | |
34dc7c2f BB |
199 | } |
200 | ||
428870ff BB |
201 | zn->zn_hash = zap_hash(zn); |
202 | return (zn); | |
203 | } | |
204 | ||
205 | zap_name_t * | |
206 | zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) | |
207 | { | |
79c76d5b | 208 | zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); |
428870ff BB |
209 | |
210 | ASSERT(zap->zap_normflags == 0); | |
211 | zn->zn_zap = zap; | |
212 | zn->zn_key_intlen = sizeof (*key); | |
213 | zn->zn_key_orig = zn->zn_key_norm = key; | |
214 | zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; | |
215 | zn->zn_matchtype = MT_EXACT; | |
216 | ||
217 | zn->zn_hash = zap_hash(zn); | |
34dc7c2f BB |
218 | return (zn); |
219 | } | |
220 | ||
221 | static void | |
222 | mzap_byteswap(mzap_phys_t *buf, size_t size) | |
223 | { | |
224 | int i, max; | |
225 | buf->mz_block_type = BSWAP_64(buf->mz_block_type); | |
226 | buf->mz_salt = BSWAP_64(buf->mz_salt); | |
227 | buf->mz_normflags = BSWAP_64(buf->mz_normflags); | |
228 | max = (size / MZAP_ENT_LEN) - 1; | |
229 | for (i = 0; i < max; i++) { | |
230 | buf->mz_chunk[i].mze_value = | |
231 | BSWAP_64(buf->mz_chunk[i].mze_value); | |
232 | buf->mz_chunk[i].mze_cd = | |
233 | BSWAP_32(buf->mz_chunk[i].mze_cd); | |
234 | } | |
235 | } | |
236 | ||
237 | void | |
238 | zap_byteswap(void *buf, size_t size) | |
239 | { | |
240 | uint64_t block_type; | |
241 | ||
242 | block_type = *(uint64_t *)buf; | |
243 | ||
244 | if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { | |
245 | /* ASSERT(magic == ZAP_LEAF_MAGIC); */ | |
246 | mzap_byteswap(buf, size); | |
247 | } else { | |
248 | fzap_byteswap(buf, size); | |
249 | } | |
250 | } | |
251 | ||
252 | static int | |
253 | mze_compare(const void *arg1, const void *arg2) | |
254 | { | |
255 | const mzap_ent_t *mze1 = arg1; | |
256 | const mzap_ent_t *mze2 = arg2; | |
257 | ||
258 | if (mze1->mze_hash > mze2->mze_hash) | |
259 | return (+1); | |
260 | if (mze1->mze_hash < mze2->mze_hash) | |
261 | return (-1); | |
428870ff | 262 | if (mze1->mze_cd > mze2->mze_cd) |
34dc7c2f | 263 | return (+1); |
428870ff | 264 | if (mze1->mze_cd < mze2->mze_cd) |
34dc7c2f BB |
265 | return (-1); |
266 | return (0); | |
267 | } | |
268 | ||
269 | static void | |
428870ff | 270 | mze_insert(zap_t *zap, int chunkid, uint64_t hash) |
34dc7c2f BB |
271 | { |
272 | mzap_ent_t *mze; | |
273 | ||
274 | ASSERT(zap->zap_ismicro); | |
275 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); | |
34dc7c2f | 276 | |
79c76d5b | 277 | mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); |
34dc7c2f BB |
278 | mze->mze_chunkid = chunkid; |
279 | mze->mze_hash = hash; | |
428870ff BB |
280 | mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; |
281 | ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); | |
34dc7c2f BB |
282 | avl_add(&zap->zap_m.zap_avl, mze); |
283 | } | |
284 | ||
285 | static mzap_ent_t * | |
286 | mze_find(zap_name_t *zn) | |
287 | { | |
288 | mzap_ent_t mze_tofind; | |
289 | mzap_ent_t *mze; | |
290 | avl_index_t idx; | |
291 | avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; | |
292 | ||
293 | ASSERT(zn->zn_zap->zap_ismicro); | |
294 | ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); | |
295 | ||
34dc7c2f | 296 | mze_tofind.mze_hash = zn->zn_hash; |
428870ff | 297 | mze_tofind.mze_cd = 0; |
34dc7c2f BB |
298 | |
299 | again: | |
300 | mze = avl_find(avl, &mze_tofind, &idx); | |
301 | if (mze == NULL) | |
302 | mze = avl_nearest(avl, idx, AVL_AFTER); | |
303 | for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { | |
428870ff BB |
304 | ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); |
305 | if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) | |
34dc7c2f BB |
306 | return (mze); |
307 | } | |
308 | if (zn->zn_matchtype == MT_BEST) { | |
309 | zn->zn_matchtype = MT_FIRST; | |
310 | goto again; | |
311 | } | |
312 | return (NULL); | |
313 | } | |
314 | ||
315 | static uint32_t | |
316 | mze_find_unused_cd(zap_t *zap, uint64_t hash) | |
317 | { | |
318 | mzap_ent_t mze_tofind; | |
319 | mzap_ent_t *mze; | |
320 | avl_index_t idx; | |
321 | avl_tree_t *avl = &zap->zap_m.zap_avl; | |
322 | uint32_t cd; | |
323 | ||
324 | ASSERT(zap->zap_ismicro); | |
325 | ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); | |
326 | ||
327 | mze_tofind.mze_hash = hash; | |
428870ff | 328 | mze_tofind.mze_cd = 0; |
34dc7c2f BB |
329 | |
330 | cd = 0; | |
331 | for (mze = avl_find(avl, &mze_tofind, &idx); | |
332 | mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { | |
428870ff | 333 | if (mze->mze_cd != cd) |
34dc7c2f BB |
334 | break; |
335 | cd++; | |
336 | } | |
337 | ||
338 | return (cd); | |
339 | } | |
340 | ||
341 | static void | |
342 | mze_remove(zap_t *zap, mzap_ent_t *mze) | |
343 | { | |
344 | ASSERT(zap->zap_ismicro); | |
345 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); | |
346 | ||
347 | avl_remove(&zap->zap_m.zap_avl, mze); | |
348 | kmem_free(mze, sizeof (mzap_ent_t)); | |
349 | } | |
350 | ||
351 | static void | |
352 | mze_destroy(zap_t *zap) | |
353 | { | |
354 | mzap_ent_t *mze; | |
355 | void *avlcookie = NULL; | |
356 | ||
c65aa5b2 | 357 | while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))) |
34dc7c2f BB |
358 | kmem_free(mze, sizeof (mzap_ent_t)); |
359 | avl_destroy(&zap->zap_m.zap_avl); | |
360 | } | |
361 | ||
362 | static zap_t * | |
363 | mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) | |
364 | { | |
365 | zap_t *winner; | |
366 | zap_t *zap; | |
367 | int i; | |
368 | ||
369 | ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); | |
370 | ||
79c76d5b | 371 | zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); |
ef5319df | 372 | rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL); |
34dc7c2f BB |
373 | rw_enter(&zap->zap_rwlock, RW_WRITER); |
374 | zap->zap_objset = os; | |
375 | zap->zap_object = obj; | |
376 | zap->zap_dbuf = db; | |
377 | ||
378 | if (*(uint64_t *)db->db_data != ZBT_MICRO) { | |
379 | mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); | |
9bd274dd | 380 | zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1; |
34dc7c2f BB |
381 | } else { |
382 | zap->zap_ismicro = TRUE; | |
383 | } | |
384 | ||
385 | /* | |
386 | * Make sure that zap_ismicro is set before we let others see | |
387 | * it, because zap_lockdir() checks zap_ismicro without the lock | |
388 | * held. | |
389 | */ | |
0c66c32d JG |
390 | dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf); |
391 | winner = dmu_buf_set_user(db, &zap->zap_dbu); | |
34dc7c2f BB |
392 | |
393 | if (winner != NULL) { | |
394 | rw_exit(&zap->zap_rwlock); | |
395 | rw_destroy(&zap->zap_rwlock); | |
396 | if (!zap->zap_ismicro) | |
397 | mutex_destroy(&zap->zap_f.zap_num_entries_mtx); | |
398 | kmem_free(zap, sizeof (zap_t)); | |
399 | return (winner); | |
400 | } | |
401 | ||
402 | if (zap->zap_ismicro) { | |
d683ddbb JG |
403 | zap->zap_salt = zap_m_phys(zap)->mz_salt; |
404 | zap->zap_normflags = zap_m_phys(zap)->mz_normflags; | |
34dc7c2f BB |
405 | zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; |
406 | avl_create(&zap->zap_m.zap_avl, mze_compare, | |
407 | sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); | |
408 | ||
409 | for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { | |
410 | mzap_ent_phys_t *mze = | |
d683ddbb | 411 | &zap_m_phys(zap)->mz_chunk[i]; |
34dc7c2f BB |
412 | if (mze->mze_name[0]) { |
413 | zap_name_t *zn; | |
414 | ||
415 | zap->zap_m.zap_num_entries++; | |
416 | zn = zap_name_alloc(zap, mze->mze_name, | |
417 | MT_EXACT); | |
428870ff | 418 | mze_insert(zap, i, zn->zn_hash); |
34dc7c2f BB |
419 | zap_name_free(zn); |
420 | } | |
421 | } | |
422 | } else { | |
d683ddbb JG |
423 | zap->zap_salt = zap_f_phys(zap)->zap_salt; |
424 | zap->zap_normflags = zap_f_phys(zap)->zap_normflags; | |
34dc7c2f BB |
425 | |
426 | ASSERT3U(sizeof (struct zap_leaf_header), ==, | |
427 | 2*ZAP_LEAF_CHUNKSIZE); | |
428 | ||
429 | /* | |
430 | * The embedded pointer table should not overlap the | |
431 | * other members. | |
432 | */ | |
433 | ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, | |
d683ddbb | 434 | &zap_f_phys(zap)->zap_salt); |
34dc7c2f BB |
435 | |
436 | /* | |
437 | * The embedded pointer table should end at the end of | |
438 | * the block | |
439 | */ | |
440 | ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, | |
441 | 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - | |
d683ddbb | 442 | (uintptr_t)zap_f_phys(zap), ==, |
34dc7c2f BB |
443 | zap->zap_dbuf->db_size); |
444 | } | |
445 | rw_exit(&zap->zap_rwlock); | |
446 | return (zap); | |
447 | } | |
448 | ||
449 | int | |
450 | zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, | |
451 | krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) | |
452 | { | |
ceb49b0a | 453 | dmu_object_info_t doi; |
34dc7c2f BB |
454 | zap_t *zap; |
455 | dmu_buf_t *db; | |
456 | krw_t lt; | |
457 | int err; | |
458 | ||
459 | *zapp = NULL; | |
460 | ||
428870ff | 461 | err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH); |
34dc7c2f BB |
462 | if (err) |
463 | return (err); | |
464 | ||
ceb49b0a BB |
465 | dmu_object_info_from_db(db, &doi); |
466 | if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP) | |
467 | return (SET_ERROR(EINVAL)); | |
34dc7c2f BB |
468 | |
469 | zap = dmu_buf_get_user(db); | |
470 | if (zap == NULL) | |
471 | zap = mzap_open(os, obj, db); | |
472 | ||
473 | /* | |
474 | * We're checking zap_ismicro without the lock held, in order to | |
475 | * tell what type of lock we want. Once we have some sort of | |
476 | * lock, see if it really is the right type. In practice this | |
477 | * can only be different if it was upgraded from micro to fat, | |
478 | * and micro wanted WRITER but fat only needs READER. | |
479 | */ | |
480 | lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; | |
481 | rw_enter(&zap->zap_rwlock, lt); | |
482 | if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { | |
483 | /* it was upgraded, now we only need reader */ | |
484 | ASSERT(lt == RW_WRITER); | |
485 | ASSERT(RW_READER == | |
486 | (!zap->zap_ismicro && fatreader) ? RW_READER : lti); | |
487 | rw_downgrade(&zap->zap_rwlock); | |
488 | lt = RW_READER; | |
489 | } | |
490 | ||
491 | zap->zap_objset = os; | |
492 | ||
493 | if (lt == RW_WRITER) | |
494 | dmu_buf_will_dirty(db, tx); | |
495 | ||
496 | ASSERT3P(zap->zap_dbuf, ==, db); | |
497 | ||
498 | ASSERT(!zap->zap_ismicro || | |
499 | zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); | |
500 | if (zap->zap_ismicro && tx && adding && | |
501 | zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { | |
502 | uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; | |
503 | if (newsz > MZAP_MAX_BLKSZ) { | |
504 | dprintf("upgrading obj %llu: num_entries=%u\n", | |
505 | obj, zap->zap_m.zap_num_entries); | |
506 | *zapp = zap; | |
428870ff | 507 | return (mzap_upgrade(zapp, tx, 0)); |
34dc7c2f BB |
508 | } |
509 | err = dmu_object_set_blocksize(os, obj, newsz, 0, tx); | |
c99c9001 | 510 | ASSERT0(err); |
34dc7c2f BB |
511 | zap->zap_m.zap_num_chunks = |
512 | db->db_size / MZAP_ENT_LEN - 1; | |
513 | } | |
514 | ||
515 | *zapp = zap; | |
516 | return (0); | |
517 | } | |
518 | ||
519 | void | |
520 | zap_unlockdir(zap_t *zap) | |
521 | { | |
522 | rw_exit(&zap->zap_rwlock); | |
523 | dmu_buf_rele(zap->zap_dbuf, NULL); | |
524 | } | |
525 | ||
526 | static int | |
428870ff | 527 | mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags) |
34dc7c2f BB |
528 | { |
529 | mzap_phys_t *mzp; | |
428870ff BB |
530 | int i, sz, nchunks; |
531 | int err = 0; | |
34dc7c2f BB |
532 | zap_t *zap = *zapp; |
533 | ||
534 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); | |
535 | ||
536 | sz = zap->zap_dbuf->db_size; | |
4dd18932 | 537 | mzp = zio_buf_alloc(sz); |
34dc7c2f BB |
538 | bcopy(zap->zap_dbuf->db_data, mzp, sz); |
539 | nchunks = zap->zap_m.zap_num_chunks; | |
540 | ||
428870ff BB |
541 | if (!flags) { |
542 | err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, | |
543 | 1ULL << fzap_default_block_shift, 0, tx); | |
544 | if (err) { | |
4dd18932 | 545 | zio_buf_free(mzp, sz); |
428870ff BB |
546 | return (err); |
547 | } | |
34dc7c2f BB |
548 | } |
549 | ||
550 | dprintf("upgrading obj=%llu with %u chunks\n", | |
551 | zap->zap_object, nchunks); | |
552 | /* XXX destroy the avl later, so we can use the stored hash value */ | |
553 | mze_destroy(zap); | |
554 | ||
428870ff | 555 | fzap_upgrade(zap, tx, flags); |
34dc7c2f BB |
556 | |
557 | for (i = 0; i < nchunks; i++) { | |
34dc7c2f BB |
558 | mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; |
559 | zap_name_t *zn; | |
560 | if (mze->mze_name[0] == 0) | |
561 | continue; | |
562 | dprintf("adding %s=%llu\n", | |
563 | mze->mze_name, mze->mze_value); | |
564 | zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT); | |
565 | err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx); | |
566 | zap = zn->zn_zap; /* fzap_add_cd() may change zap */ | |
567 | zap_name_free(zn); | |
568 | if (err) | |
569 | break; | |
570 | } | |
4dd18932 | 571 | zio_buf_free(mzp, sz); |
34dc7c2f BB |
572 | *zapp = zap; |
573 | return (err); | |
574 | } | |
575 | ||
fa86b5db | 576 | void |
428870ff BB |
577 | mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags, |
578 | dmu_tx_t *tx) | |
34dc7c2f BB |
579 | { |
580 | dmu_buf_t *db; | |
581 | mzap_phys_t *zp; | |
582 | ||
428870ff | 583 | VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); |
34dc7c2f BB |
584 | |
585 | #ifdef ZFS_DEBUG | |
586 | { | |
587 | dmu_object_info_t doi; | |
588 | dmu_object_info_from_db(db, &doi); | |
9ae529ec | 589 | ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); |
34dc7c2f BB |
590 | } |
591 | #endif | |
592 | ||
593 | dmu_buf_will_dirty(db, tx); | |
594 | zp = db->db_data; | |
595 | zp->mz_block_type = ZBT_MICRO; | |
596 | zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL; | |
597 | zp->mz_normflags = normflags; | |
598 | dmu_buf_rele(db, FTAG); | |
428870ff BB |
599 | |
600 | if (flags != 0) { | |
601 | zap_t *zap; | |
602 | /* Only fat zap supports flags; upgrade immediately. */ | |
603 | VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER, | |
604 | B_FALSE, B_FALSE, &zap)); | |
605 | VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags)); | |
606 | zap_unlockdir(zap); | |
607 | } | |
34dc7c2f BB |
608 | } |
609 | ||
610 | int | |
611 | zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, | |
612 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
613 | { | |
614 | return (zap_create_claim_norm(os, obj, | |
615 | 0, ot, bonustype, bonuslen, tx)); | |
616 | } | |
617 | ||
618 | int | |
619 | zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, | |
620 | dmu_object_type_t ot, | |
621 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
622 | { | |
623 | int err; | |
624 | ||
625 | err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx); | |
626 | if (err != 0) | |
627 | return (err); | |
428870ff | 628 | mzap_create_impl(os, obj, normflags, 0, tx); |
34dc7c2f BB |
629 | return (0); |
630 | } | |
631 | ||
632 | uint64_t | |
633 | zap_create(objset_t *os, dmu_object_type_t ot, | |
634 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
635 | { | |
636 | return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); | |
637 | } | |
638 | ||
639 | uint64_t | |
640 | zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, | |
641 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
642 | { | |
643 | uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); | |
644 | ||
428870ff BB |
645 | mzap_create_impl(os, obj, normflags, 0, tx); |
646 | return (obj); | |
647 | } | |
648 | ||
649 | uint64_t | |
650 | zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, | |
651 | dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, | |
652 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
653 | { | |
654 | uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx); | |
655 | ||
656 | ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT && | |
657 | leaf_blockshift <= SPA_MAXBLOCKSHIFT && | |
658 | indirect_blockshift >= SPA_MINBLOCKSHIFT && | |
659 | indirect_blockshift <= SPA_MAXBLOCKSHIFT); | |
660 | ||
661 | VERIFY(dmu_object_set_blocksize(os, obj, | |
662 | 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0); | |
663 | ||
664 | mzap_create_impl(os, obj, normflags, flags, tx); | |
34dc7c2f BB |
665 | return (obj); |
666 | } | |
667 | ||
668 | int | |
669 | zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) | |
670 | { | |
671 | /* | |
672 | * dmu_object_free will free the object number and free the | |
673 | * data. Freeing the data will cause our pageout function to be | |
674 | * called, which will destroy our data (zap_leaf_t's and zap_t). | |
675 | */ | |
676 | ||
677 | return (dmu_object_free(os, zapobj, tx)); | |
678 | } | |
679 | ||
34dc7c2f | 680 | void |
0c66c32d | 681 | zap_evict(void *dbu) |
34dc7c2f | 682 | { |
0c66c32d | 683 | zap_t *zap = dbu; |
34dc7c2f BB |
684 | |
685 | rw_destroy(&zap->zap_rwlock); | |
686 | ||
687 | if (zap->zap_ismicro) | |
688 | mze_destroy(zap); | |
689 | else | |
690 | mutex_destroy(&zap->zap_f.zap_num_entries_mtx); | |
691 | ||
692 | kmem_free(zap, sizeof (zap_t)); | |
693 | } | |
694 | ||
695 | int | |
696 | zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) | |
697 | { | |
698 | zap_t *zap; | |
699 | int err; | |
700 | ||
701 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
702 | if (err) | |
703 | return (err); | |
704 | if (!zap->zap_ismicro) { | |
705 | err = fzap_count(zap, count); | |
706 | } else { | |
707 | *count = zap->zap_m.zap_num_entries; | |
708 | } | |
709 | zap_unlockdir(zap); | |
710 | return (err); | |
711 | } | |
712 | ||
713 | /* | |
714 | * zn may be NULL; if not specified, it will be computed if needed. | |
715 | * See also the comment above zap_entry_normalization_conflict(). | |
716 | */ | |
717 | static boolean_t | |
718 | mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) | |
719 | { | |
720 | mzap_ent_t *other; | |
721 | int direction = AVL_BEFORE; | |
722 | boolean_t allocdzn = B_FALSE; | |
723 | ||
724 | if (zap->zap_normflags == 0) | |
725 | return (B_FALSE); | |
726 | ||
727 | again: | |
728 | for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction); | |
729 | other && other->mze_hash == mze->mze_hash; | |
730 | other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { | |
731 | ||
732 | if (zn == NULL) { | |
428870ff | 733 | zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, |
34dc7c2f BB |
734 | MT_FIRST); |
735 | allocdzn = B_TRUE; | |
736 | } | |
428870ff | 737 | if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { |
34dc7c2f BB |
738 | if (allocdzn) |
739 | zap_name_free(zn); | |
740 | return (B_TRUE); | |
741 | } | |
742 | } | |
743 | ||
744 | if (direction == AVL_BEFORE) { | |
745 | direction = AVL_AFTER; | |
746 | goto again; | |
747 | } | |
748 | ||
749 | if (allocdzn) | |
750 | zap_name_free(zn); | |
751 | return (B_FALSE); | |
752 | } | |
753 | ||
754 | /* | |
755 | * Routines for manipulating attributes. | |
756 | */ | |
757 | ||
758 | int | |
759 | zap_lookup(objset_t *os, uint64_t zapobj, const char *name, | |
760 | uint64_t integer_size, uint64_t num_integers, void *buf) | |
761 | { | |
762 | return (zap_lookup_norm(os, zapobj, name, integer_size, | |
763 | num_integers, buf, MT_EXACT, NULL, 0, NULL)); | |
764 | } | |
765 | ||
766 | int | |
767 | zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, | |
768 | uint64_t integer_size, uint64_t num_integers, void *buf, | |
769 | matchtype_t mt, char *realname, int rn_len, | |
770 | boolean_t *ncp) | |
771 | { | |
772 | zap_t *zap; | |
773 | int err; | |
774 | mzap_ent_t *mze; | |
775 | zap_name_t *zn; | |
776 | ||
777 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
778 | if (err) | |
779 | return (err); | |
780 | zn = zap_name_alloc(zap, name, mt); | |
781 | if (zn == NULL) { | |
782 | zap_unlockdir(zap); | |
2e528b49 | 783 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
784 | } |
785 | ||
786 | if (!zap->zap_ismicro) { | |
787 | err = fzap_lookup(zn, integer_size, num_integers, buf, | |
788 | realname, rn_len, ncp); | |
789 | } else { | |
790 | mze = mze_find(zn); | |
791 | if (mze == NULL) { | |
2e528b49 | 792 | err = SET_ERROR(ENOENT); |
34dc7c2f BB |
793 | } else { |
794 | if (num_integers < 1) { | |
2e528b49 | 795 | err = SET_ERROR(EOVERFLOW); |
34dc7c2f | 796 | } else if (integer_size != 8) { |
2e528b49 | 797 | err = SET_ERROR(EINVAL); |
34dc7c2f | 798 | } else { |
428870ff BB |
799 | *(uint64_t *)buf = |
800 | MZE_PHYS(zap, mze)->mze_value; | |
34dc7c2f | 801 | (void) strlcpy(realname, |
428870ff | 802 | MZE_PHYS(zap, mze)->mze_name, rn_len); |
34dc7c2f BB |
803 | if (ncp) { |
804 | *ncp = mzap_normalization_conflict(zap, | |
805 | zn, mze); | |
806 | } | |
807 | } | |
808 | } | |
809 | } | |
810 | zap_name_free(zn); | |
811 | zap_unlockdir(zap); | |
812 | return (err); | |
813 | } | |
814 | ||
428870ff BB |
815 | int |
816 | zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
817 | int key_numints) | |
818 | { | |
819 | zap_t *zap; | |
820 | int err; | |
821 | zap_name_t *zn; | |
822 | ||
823 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
824 | if (err) | |
825 | return (err); | |
826 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
827 | if (zn == NULL) { | |
828 | zap_unlockdir(zap); | |
2e528b49 | 829 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
830 | } |
831 | ||
832 | fzap_prefetch(zn); | |
833 | zap_name_free(zn); | |
834 | zap_unlockdir(zap); | |
835 | return (err); | |
836 | } | |
837 | ||
838 | int | |
839 | zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
840 | int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) | |
841 | { | |
842 | zap_t *zap; | |
843 | int err; | |
844 | zap_name_t *zn; | |
845 | ||
846 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
847 | if (err) | |
848 | return (err); | |
849 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
850 | if (zn == NULL) { | |
851 | zap_unlockdir(zap); | |
2e528b49 | 852 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
853 | } |
854 | ||
855 | err = fzap_lookup(zn, integer_size, num_integers, buf, | |
856 | NULL, 0, NULL); | |
857 | zap_name_free(zn); | |
858 | zap_unlockdir(zap); | |
859 | return (err); | |
860 | } | |
861 | ||
862 | int | |
863 | zap_contains(objset_t *os, uint64_t zapobj, const char *name) | |
864 | { | |
fa86b5db MA |
865 | int err = zap_lookup_norm(os, zapobj, name, 0, |
866 | 0, NULL, MT_EXACT, NULL, 0, NULL); | |
428870ff BB |
867 | if (err == EOVERFLOW || err == EINVAL) |
868 | err = 0; /* found, but skipped reading the value */ | |
869 | return (err); | |
870 | } | |
871 | ||
34dc7c2f BB |
872 | int |
873 | zap_length(objset_t *os, uint64_t zapobj, const char *name, | |
874 | uint64_t *integer_size, uint64_t *num_integers) | |
875 | { | |
876 | zap_t *zap; | |
877 | int err; | |
878 | mzap_ent_t *mze; | |
879 | zap_name_t *zn; | |
880 | ||
881 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
882 | if (err) | |
883 | return (err); | |
884 | zn = zap_name_alloc(zap, name, MT_EXACT); | |
885 | if (zn == NULL) { | |
886 | zap_unlockdir(zap); | |
2e528b49 | 887 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
888 | } |
889 | if (!zap->zap_ismicro) { | |
890 | err = fzap_length(zn, integer_size, num_integers); | |
891 | } else { | |
892 | mze = mze_find(zn); | |
893 | if (mze == NULL) { | |
2e528b49 | 894 | err = SET_ERROR(ENOENT); |
34dc7c2f BB |
895 | } else { |
896 | if (integer_size) | |
897 | *integer_size = 8; | |
898 | if (num_integers) | |
899 | *num_integers = 1; | |
900 | } | |
901 | } | |
902 | zap_name_free(zn); | |
903 | zap_unlockdir(zap); | |
904 | return (err); | |
905 | } | |
906 | ||
428870ff BB |
907 | int |
908 | zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
909 | int key_numints, uint64_t *integer_size, uint64_t *num_integers) | |
910 | { | |
911 | zap_t *zap; | |
912 | int err; | |
913 | zap_name_t *zn; | |
914 | ||
915 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
916 | if (err) | |
917 | return (err); | |
918 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
919 | if (zn == NULL) { | |
920 | zap_unlockdir(zap); | |
2e528b49 | 921 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
922 | } |
923 | err = fzap_length(zn, integer_size, num_integers); | |
924 | zap_name_free(zn); | |
925 | zap_unlockdir(zap); | |
926 | return (err); | |
927 | } | |
928 | ||
34dc7c2f BB |
929 | static void |
930 | mzap_addent(zap_name_t *zn, uint64_t value) | |
931 | { | |
932 | int i; | |
933 | zap_t *zap = zn->zn_zap; | |
934 | int start = zap->zap_m.zap_alloc_next; | |
935 | uint32_t cd; | |
936 | ||
34dc7c2f BB |
937 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); |
938 | ||
939 | #ifdef ZFS_DEBUG | |
940 | for (i = 0; i < zap->zap_m.zap_num_chunks; i++) { | |
d1d7e268 | 941 | ASSERTV(mzap_ent_phys_t *mze); |
d683ddbb | 942 | ASSERT(mze = &zap_m_phys(zap)->mz_chunk[i]); |
428870ff | 943 | ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); |
34dc7c2f BB |
944 | } |
945 | #endif | |
946 | ||
947 | cd = mze_find_unused_cd(zap, zn->zn_hash); | |
948 | /* given the limited size of the microzap, this can't happen */ | |
428870ff | 949 | ASSERT(cd < zap_maxcd(zap)); |
34dc7c2f BB |
950 | |
951 | again: | |
952 | for (i = start; i < zap->zap_m.zap_num_chunks; i++) { | |
d683ddbb | 953 | mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; |
34dc7c2f BB |
954 | if (mze->mze_name[0] == 0) { |
955 | mze->mze_value = value; | |
956 | mze->mze_cd = cd; | |
428870ff | 957 | (void) strcpy(mze->mze_name, zn->zn_key_orig); |
34dc7c2f BB |
958 | zap->zap_m.zap_num_entries++; |
959 | zap->zap_m.zap_alloc_next = i+1; | |
960 | if (zap->zap_m.zap_alloc_next == | |
961 | zap->zap_m.zap_num_chunks) | |
962 | zap->zap_m.zap_alloc_next = 0; | |
428870ff | 963 | mze_insert(zap, i, zn->zn_hash); |
34dc7c2f BB |
964 | return; |
965 | } | |
966 | } | |
967 | if (start != 0) { | |
968 | start = 0; | |
969 | goto again; | |
970 | } | |
989fd514 | 971 | cmn_err(CE_PANIC, "out of entries!"); |
34dc7c2f BB |
972 | } |
973 | ||
974 | int | |
428870ff | 975 | zap_add(objset_t *os, uint64_t zapobj, const char *key, |
34dc7c2f BB |
976 | int integer_size, uint64_t num_integers, |
977 | const void *val, dmu_tx_t *tx) | |
978 | { | |
979 | zap_t *zap; | |
980 | int err; | |
981 | mzap_ent_t *mze; | |
982 | const uint64_t *intval = val; | |
983 | zap_name_t *zn; | |
984 | ||
985 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); | |
986 | if (err) | |
987 | return (err); | |
428870ff | 988 | zn = zap_name_alloc(zap, key, MT_EXACT); |
34dc7c2f BB |
989 | if (zn == NULL) { |
990 | zap_unlockdir(zap); | |
2e528b49 | 991 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
992 | } |
993 | if (!zap->zap_ismicro) { | |
994 | err = fzap_add(zn, integer_size, num_integers, val, tx); | |
995 | zap = zn->zn_zap; /* fzap_add() may change zap */ | |
996 | } else if (integer_size != 8 || num_integers != 1 || | |
428870ff BB |
997 | strlen(key) >= MZAP_NAME_LEN) { |
998 | err = mzap_upgrade(&zn->zn_zap, tx, 0); | |
34dc7c2f BB |
999 | if (err == 0) |
1000 | err = fzap_add(zn, integer_size, num_integers, val, tx); | |
1001 | zap = zn->zn_zap; /* fzap_add() may change zap */ | |
1002 | } else { | |
1003 | mze = mze_find(zn); | |
1004 | if (mze != NULL) { | |
2e528b49 | 1005 | err = SET_ERROR(EEXIST); |
34dc7c2f BB |
1006 | } else { |
1007 | mzap_addent(zn, *intval); | |
1008 | } | |
1009 | } | |
1010 | ASSERT(zap == zn->zn_zap); | |
1011 | zap_name_free(zn); | |
1012 | if (zap != NULL) /* may be NULL if fzap_add() failed */ | |
1013 | zap_unlockdir(zap); | |
1014 | return (err); | |
1015 | } | |
1016 | ||
428870ff BB |
1017 | int |
1018 | zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1019 | int key_numints, int integer_size, uint64_t num_integers, | |
1020 | const void *val, dmu_tx_t *tx) | |
1021 | { | |
1022 | zap_t *zap; | |
1023 | int err; | |
1024 | zap_name_t *zn; | |
1025 | ||
1026 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); | |
1027 | if (err) | |
1028 | return (err); | |
1029 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
1030 | if (zn == NULL) { | |
1031 | zap_unlockdir(zap); | |
2e528b49 | 1032 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
1033 | } |
1034 | err = fzap_add(zn, integer_size, num_integers, val, tx); | |
1035 | zap = zn->zn_zap; /* fzap_add() may change zap */ | |
1036 | zap_name_free(zn); | |
1037 | if (zap != NULL) /* may be NULL if fzap_add() failed */ | |
1038 | zap_unlockdir(zap); | |
1039 | return (err); | |
1040 | } | |
1041 | ||
34dc7c2f BB |
1042 | int |
1043 | zap_update(objset_t *os, uint64_t zapobj, const char *name, | |
1044 | int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) | |
1045 | { | |
1046 | zap_t *zap; | |
1047 | mzap_ent_t *mze; | |
1048 | const uint64_t *intval = val; | |
1049 | zap_name_t *zn; | |
1050 | int err; | |
1051 | ||
428870ff | 1052 | #ifdef ZFS_DEBUG |
1fde1e37 BB |
1053 | uint64_t oldval; |
1054 | ||
428870ff BB |
1055 | /* |
1056 | * If there is an old value, it shouldn't change across the | |
1057 | * lockdir (eg, due to bprewrite's xlation). | |
1058 | */ | |
1059 | if (integer_size == 8 && num_integers == 1) | |
1060 | (void) zap_lookup(os, zapobj, name, 8, 1, &oldval); | |
1061 | #endif | |
1062 | ||
34dc7c2f BB |
1063 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); |
1064 | if (err) | |
1065 | return (err); | |
1066 | zn = zap_name_alloc(zap, name, MT_EXACT); | |
1067 | if (zn == NULL) { | |
1068 | zap_unlockdir(zap); | |
2e528b49 | 1069 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
1070 | } |
1071 | if (!zap->zap_ismicro) { | |
1072 | err = fzap_update(zn, integer_size, num_integers, val, tx); | |
1073 | zap = zn->zn_zap; /* fzap_update() may change zap */ | |
1074 | } else if (integer_size != 8 || num_integers != 1 || | |
1075 | strlen(name) >= MZAP_NAME_LEN) { | |
1076 | dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", | |
1077 | zapobj, integer_size, num_integers, name); | |
428870ff | 1078 | err = mzap_upgrade(&zn->zn_zap, tx, 0); |
34dc7c2f BB |
1079 | if (err == 0) |
1080 | err = fzap_update(zn, integer_size, num_integers, | |
1081 | val, tx); | |
1082 | zap = zn->zn_zap; /* fzap_update() may change zap */ | |
1083 | } else { | |
1084 | mze = mze_find(zn); | |
1085 | if (mze != NULL) { | |
428870ff BB |
1086 | ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval); |
1087 | MZE_PHYS(zap, mze)->mze_value = *intval; | |
34dc7c2f BB |
1088 | } else { |
1089 | mzap_addent(zn, *intval); | |
1090 | } | |
1091 | } | |
1092 | ASSERT(zap == zn->zn_zap); | |
1093 | zap_name_free(zn); | |
1094 | if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ | |
1095 | zap_unlockdir(zap); | |
1096 | return (err); | |
1097 | } | |
1098 | ||
428870ff BB |
1099 | int |
1100 | zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1101 | int key_numints, | |
1102 | int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) | |
1103 | { | |
1104 | zap_t *zap; | |
1105 | zap_name_t *zn; | |
1106 | int err; | |
1107 | ||
1108 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap); | |
1109 | if (err) | |
1110 | return (err); | |
1111 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
1112 | if (zn == NULL) { | |
1113 | zap_unlockdir(zap); | |
2e528b49 | 1114 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
1115 | } |
1116 | err = fzap_update(zn, integer_size, num_integers, val, tx); | |
1117 | zap = zn->zn_zap; /* fzap_update() may change zap */ | |
1118 | zap_name_free(zn); | |
1119 | if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ | |
1120 | zap_unlockdir(zap); | |
1121 | return (err); | |
1122 | } | |
1123 | ||
34dc7c2f BB |
1124 | int |
1125 | zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) | |
1126 | { | |
1127 | return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx)); | |
1128 | } | |
1129 | ||
1130 | int | |
1131 | zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, | |
1132 | matchtype_t mt, dmu_tx_t *tx) | |
1133 | { | |
1134 | zap_t *zap; | |
1135 | int err; | |
1136 | mzap_ent_t *mze; | |
1137 | zap_name_t *zn; | |
1138 | ||
1139 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); | |
1140 | if (err) | |
1141 | return (err); | |
1142 | zn = zap_name_alloc(zap, name, mt); | |
1143 | if (zn == NULL) { | |
1144 | zap_unlockdir(zap); | |
2e528b49 | 1145 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
1146 | } |
1147 | if (!zap->zap_ismicro) { | |
1148 | err = fzap_remove(zn, tx); | |
1149 | } else { | |
1150 | mze = mze_find(zn); | |
1151 | if (mze == NULL) { | |
2e528b49 | 1152 | err = SET_ERROR(ENOENT); |
34dc7c2f BB |
1153 | } else { |
1154 | zap->zap_m.zap_num_entries--; | |
d683ddbb | 1155 | bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], |
34dc7c2f BB |
1156 | sizeof (mzap_ent_phys_t)); |
1157 | mze_remove(zap, mze); | |
1158 | } | |
1159 | } | |
1160 | zap_name_free(zn); | |
1161 | zap_unlockdir(zap); | |
1162 | return (err); | |
1163 | } | |
1164 | ||
428870ff BB |
1165 | int |
1166 | zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1167 | int key_numints, dmu_tx_t *tx) | |
1168 | { | |
1169 | zap_t *zap; | |
1170 | int err; | |
1171 | zap_name_t *zn; | |
1172 | ||
1173 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap); | |
1174 | if (err) | |
1175 | return (err); | |
1176 | zn = zap_name_alloc_uint64(zap, key, key_numints); | |
1177 | if (zn == NULL) { | |
1178 | zap_unlockdir(zap); | |
2e528b49 | 1179 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
1180 | } |
1181 | err = fzap_remove(zn, tx); | |
1182 | zap_name_free(zn); | |
1183 | zap_unlockdir(zap); | |
1184 | return (err); | |
1185 | } | |
1186 | ||
34dc7c2f BB |
1187 | /* |
1188 | * Routines for iterating over the attributes. | |
1189 | */ | |
1190 | ||
34dc7c2f BB |
1191 | void |
1192 | zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, | |
1193 | uint64_t serialized) | |
1194 | { | |
1195 | zc->zc_objset = os; | |
1196 | zc->zc_zap = NULL; | |
1197 | zc->zc_leaf = NULL; | |
1198 | zc->zc_zapobj = zapobj; | |
428870ff BB |
1199 | zc->zc_serialized = serialized; |
1200 | zc->zc_hash = 0; | |
1201 | zc->zc_cd = 0; | |
34dc7c2f BB |
1202 | } |
1203 | ||
1204 | void | |
1205 | zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) | |
1206 | { | |
1207 | zap_cursor_init_serialized(zc, os, zapobj, 0); | |
1208 | } | |
1209 | ||
1210 | void | |
1211 | zap_cursor_fini(zap_cursor_t *zc) | |
1212 | { | |
1213 | if (zc->zc_zap) { | |
1214 | rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); | |
1215 | zap_unlockdir(zc->zc_zap); | |
1216 | zc->zc_zap = NULL; | |
1217 | } | |
1218 | if (zc->zc_leaf) { | |
1219 | rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); | |
1220 | zap_put_leaf(zc->zc_leaf); | |
1221 | zc->zc_leaf = NULL; | |
1222 | } | |
1223 | zc->zc_objset = NULL; | |
1224 | } | |
1225 | ||
1226 | uint64_t | |
1227 | zap_cursor_serialize(zap_cursor_t *zc) | |
1228 | { | |
1229 | if (zc->zc_hash == -1ULL) | |
1230 | return (-1ULL); | |
428870ff BB |
1231 | if (zc->zc_zap == NULL) |
1232 | return (zc->zc_serialized); | |
1233 | ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); | |
1234 | ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); | |
1235 | ||
1236 | /* | |
1237 | * We want to keep the high 32 bits of the cursor zero if we can, so | |
1238 | * that 32-bit programs can access this. So usually use a small | |
1239 | * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits | |
1240 | * of the cursor. | |
1241 | * | |
1242 | * [ collision differentiator | zap_hashbits()-bit hash value ] | |
1243 | */ | |
1244 | return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | | |
1245 | ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); | |
34dc7c2f BB |
1246 | } |
1247 | ||
1248 | int | |
1249 | zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) | |
1250 | { | |
1251 | int err; | |
1252 | avl_index_t idx; | |
1253 | mzap_ent_t mze_tofind; | |
1254 | mzap_ent_t *mze; | |
1255 | ||
1256 | if (zc->zc_hash == -1ULL) | |
2e528b49 | 1257 | return (SET_ERROR(ENOENT)); |
34dc7c2f BB |
1258 | |
1259 | if (zc->zc_zap == NULL) { | |
428870ff | 1260 | int hb; |
34dc7c2f BB |
1261 | err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, |
1262 | RW_READER, TRUE, FALSE, &zc->zc_zap); | |
1263 | if (err) | |
1264 | return (err); | |
428870ff BB |
1265 | |
1266 | /* | |
1267 | * To support zap_cursor_init_serialized, advance, retrieve, | |
1268 | * we must add to the existing zc_cd, which may already | |
1269 | * be 1 due to the zap_cursor_advance. | |
1270 | */ | |
1271 | ASSERT(zc->zc_hash == 0); | |
1272 | hb = zap_hashbits(zc->zc_zap); | |
1273 | zc->zc_hash = zc->zc_serialized << (64 - hb); | |
1274 | zc->zc_cd += zc->zc_serialized >> hb; | |
1275 | if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ | |
1276 | zc->zc_cd = 0; | |
34dc7c2f BB |
1277 | } else { |
1278 | rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); | |
1279 | } | |
1280 | if (!zc->zc_zap->zap_ismicro) { | |
1281 | err = fzap_cursor_retrieve(zc->zc_zap, zc, za); | |
1282 | } else { | |
34dc7c2f | 1283 | mze_tofind.mze_hash = zc->zc_hash; |
428870ff | 1284 | mze_tofind.mze_cd = zc->zc_cd; |
34dc7c2f BB |
1285 | |
1286 | mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); | |
1287 | if (mze == NULL) { | |
1288 | mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, | |
1289 | idx, AVL_AFTER); | |
1290 | } | |
1291 | if (mze) { | |
428870ff BB |
1292 | mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); |
1293 | ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); | |
34dc7c2f BB |
1294 | za->za_normalization_conflict = |
1295 | mzap_normalization_conflict(zc->zc_zap, NULL, mze); | |
1296 | za->za_integer_length = 8; | |
1297 | za->za_num_integers = 1; | |
428870ff BB |
1298 | za->za_first_integer = mzep->mze_value; |
1299 | (void) strcpy(za->za_name, mzep->mze_name); | |
34dc7c2f | 1300 | zc->zc_hash = mze->mze_hash; |
428870ff | 1301 | zc->zc_cd = mze->mze_cd; |
34dc7c2f BB |
1302 | err = 0; |
1303 | } else { | |
1304 | zc->zc_hash = -1ULL; | |
2e528b49 | 1305 | err = SET_ERROR(ENOENT); |
34dc7c2f BB |
1306 | } |
1307 | } | |
1308 | rw_exit(&zc->zc_zap->zap_rwlock); | |
1309 | return (err); | |
1310 | } | |
1311 | ||
1312 | void | |
1313 | zap_cursor_advance(zap_cursor_t *zc) | |
1314 | { | |
1315 | if (zc->zc_hash == -1ULL) | |
1316 | return; | |
1317 | zc->zc_cd++; | |
428870ff BB |
1318 | } |
1319 | ||
34dc7c2f BB |
1320 | int |
1321 | zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) | |
1322 | { | |
1323 | int err; | |
1324 | zap_t *zap; | |
1325 | ||
1326 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
1327 | if (err) | |
1328 | return (err); | |
1329 | ||
1330 | bzero(zs, sizeof (zap_stats_t)); | |
1331 | ||
1332 | if (zap->zap_ismicro) { | |
1333 | zs->zs_blocksize = zap->zap_dbuf->db_size; | |
1334 | zs->zs_num_entries = zap->zap_m.zap_num_entries; | |
1335 | zs->zs_num_blocks = 1; | |
1336 | } else { | |
1337 | fzap_get_stats(zap, zs); | |
1338 | } | |
1339 | zap_unlockdir(zap); | |
1340 | return (0); | |
1341 | } | |
9babb374 BB |
1342 | |
1343 | int | |
1344 | zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add, | |
45d1cae3 | 1345 | uint64_t *towrite, uint64_t *tooverwrite) |
9babb374 BB |
1346 | { |
1347 | zap_t *zap; | |
1348 | int err = 0; | |
1349 | ||
1350 | ||
1351 | /* | |
1352 | * Since, we don't have a name, we cannot figure out which blocks will | |
1353 | * be affected in this operation. So, account for the worst case : | |
1354 | * - 3 blocks overwritten: target leaf, ptrtbl block, header block | |
1355 | * - 4 new blocks written if adding: | |
1356 | * - 2 blocks for possibly split leaves, | |
1357 | * - 2 grown ptrtbl blocks | |
1358 | * | |
1359 | * This also accomodates the case where an add operation to a fairly | |
1360 | * large microzap results in a promotion to fatzap. | |
1361 | */ | |
1362 | if (name == NULL) { | |
1363 | *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; | |
1364 | return (err); | |
1365 | } | |
1366 | ||
1367 | /* | |
330d06f9 | 1368 | * We lock the zap with adding == FALSE. Because, if we pass |
9babb374 BB |
1369 | * the actual value of add, it could trigger a mzap_upgrade(). |
1370 | * At present we are just evaluating the possibility of this operation | |
1371 | * and hence we donot want to trigger an upgrade. | |
1372 | */ | |
1373 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap); | |
1374 | if (err) | |
1375 | return (err); | |
1376 | ||
1377 | if (!zap->zap_ismicro) { | |
1378 | zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT); | |
1379 | if (zn) { | |
1380 | err = fzap_count_write(zn, add, towrite, | |
1381 | tooverwrite); | |
1382 | zap_name_free(zn); | |
1383 | } else { | |
1384 | /* | |
1385 | * We treat this case as similar to (name == NULL) | |
1386 | */ | |
1387 | *towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE; | |
1388 | } | |
1389 | } else { | |
45d1cae3 BB |
1390 | /* |
1391 | * We are here if (name != NULL) and this is a micro-zap. | |
1392 | * We account for the header block depending on whether it | |
1393 | * is freeable. | |
1394 | * | |
1395 | * Incase of an add-operation it is hard to find out | |
1396 | * if this add will promote this microzap to fatzap. | |
1397 | * Hence, we consider the worst case and account for the | |
1398 | * blocks assuming this microzap would be promoted to a | |
1399 | * fatzap. | |
1400 | * | |
1401 | * 1 block overwritten : header block | |
1402 | * 4 new blocks written : 2 new split leaf, 2 grown | |
1403 | * ptrtbl blocks | |
1404 | */ | |
1405 | if (dmu_buf_freeable(zap->zap_dbuf)) | |
1406 | *tooverwrite += SPA_MAXBLOCKSIZE; | |
1407 | else | |
1408 | *towrite += SPA_MAXBLOCKSIZE; | |
1409 | ||
1410 | if (add) { | |
1411 | *towrite += 4 * SPA_MAXBLOCKSIZE; | |
9babb374 BB |
1412 | } |
1413 | } | |
1414 | ||
1415 | zap_unlockdir(zap); | |
1416 | return (err); | |
1417 | } | |
c28b2279 BB |
1418 | |
1419 | #if defined(_KERNEL) && defined(HAVE_SPL) | |
c28b2279 | 1420 | EXPORT_SYMBOL(zap_create); |
dee28b07 BB |
1421 | EXPORT_SYMBOL(zap_create_norm); |
1422 | EXPORT_SYMBOL(zap_create_flags); | |
1423 | EXPORT_SYMBOL(zap_create_claim); | |
1424 | EXPORT_SYMBOL(zap_create_claim_norm); | |
1425 | EXPORT_SYMBOL(zap_destroy); | |
c28b2279 BB |
1426 | EXPORT_SYMBOL(zap_lookup); |
1427 | EXPORT_SYMBOL(zap_lookup_norm); | |
dee28b07 BB |
1428 | EXPORT_SYMBOL(zap_lookup_uint64); |
1429 | EXPORT_SYMBOL(zap_contains); | |
1430 | EXPORT_SYMBOL(zap_prefetch_uint64); | |
1431 | EXPORT_SYMBOL(zap_count_write); | |
1432 | EXPORT_SYMBOL(zap_add); | |
1433 | EXPORT_SYMBOL(zap_add_uint64); | |
c28b2279 | 1434 | EXPORT_SYMBOL(zap_update); |
dee28b07 BB |
1435 | EXPORT_SYMBOL(zap_update_uint64); |
1436 | EXPORT_SYMBOL(zap_length); | |
1437 | EXPORT_SYMBOL(zap_length_uint64); | |
1438 | EXPORT_SYMBOL(zap_remove); | |
1439 | EXPORT_SYMBOL(zap_remove_norm); | |
1440 | EXPORT_SYMBOL(zap_remove_uint64); | |
1441 | EXPORT_SYMBOL(zap_count); | |
1442 | EXPORT_SYMBOL(zap_value_search); | |
1443 | EXPORT_SYMBOL(zap_join); | |
1444 | EXPORT_SYMBOL(zap_join_increment); | |
1445 | EXPORT_SYMBOL(zap_add_int); | |
1446 | EXPORT_SYMBOL(zap_remove_int); | |
1447 | EXPORT_SYMBOL(zap_lookup_int); | |
1448 | EXPORT_SYMBOL(zap_increment_int); | |
1449 | EXPORT_SYMBOL(zap_add_int_key); | |
1450 | EXPORT_SYMBOL(zap_lookup_int_key); | |
1451 | EXPORT_SYMBOL(zap_increment); | |
1452 | EXPORT_SYMBOL(zap_cursor_init); | |
1453 | EXPORT_SYMBOL(zap_cursor_fini); | |
1454 | EXPORT_SYMBOL(zap_cursor_retrieve); | |
1455 | EXPORT_SYMBOL(zap_cursor_advance); | |
1456 | EXPORT_SYMBOL(zap_cursor_serialize); | |
dee28b07 BB |
1457 | EXPORT_SYMBOL(zap_cursor_init_serialized); |
1458 | EXPORT_SYMBOL(zap_get_stats); | |
c28b2279 | 1459 | #endif |