]>
Commit | Line | Data |
---|---|---|
34dc7c2f BB |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License (the "License"). | |
6 | * You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
1d3ba0bf | 9 | * or https://opensource.org/licenses/CDDL-1.0. |
34dc7c2f BB |
10 | * See the License for the specific language governing permissions |
11 | * and limitations under the License. | |
12 | * | |
13 | * When distributing Covered Code, include this CDDL HEADER in each | |
14 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
15 | * If applicable, add the following below this CDDL HEADER, with the | |
16 | * fields enclosed by brackets "[]" replaced with your own identifying | |
17 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
18 | * | |
19 | * CDDL HEADER END | |
20 | */ | |
9b7b9cd3 | 21 | |
34dc7c2f | 22 | /* |
428870ff | 23 | * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. |
d9b4bf06 | 24 | * Copyright (c) 2011, 2018 by Delphix. All rights reserved. |
0c66c32d | 25 | * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. |
9b7b9cd3 | 26 | * Copyright 2017 Nexenta Systems, Inc. |
34dc7c2f BB |
27 | */ |
28 | ||
428870ff | 29 | #include <sys/zio.h> |
34dc7c2f BB |
30 | #include <sys/spa.h> |
31 | #include <sys/dmu.h> | |
32 | #include <sys/zfs_context.h> | |
33 | #include <sys/zap.h> | |
34dc7c2f BB |
34 | #include <sys/zap_impl.h> |
35 | #include <sys/zap_leaf.h> | |
9dcdee78 | 36 | #include <sys/btree.h> |
428870ff | 37 | #include <sys/arc.h> |
f1512ee6 | 38 | #include <sys/dmu_objset.h> |
34dc7c2f BB |
39 | |
40 | #ifdef _KERNEL | |
41 | #include <sys/sunddi.h> | |
42 | #endif | |
43 | ||
a4b21ead MP |
44 | int zap_micro_max_size = MZAP_MAX_BLKSZ; |
45 | ||
8bea9815 | 46 | static int mzap_upgrade(zap_t **zapp, |
dd66857d | 47 | const void *tag, dmu_tx_t *tx, zap_flags_t flags); |
34dc7c2f | 48 | |
428870ff BB |
49 | uint64_t |
50 | zap_getflags(zap_t *zap) | |
51 | { | |
52 | if (zap->zap_ismicro) | |
53 | return (0); | |
d683ddbb | 54 | return (zap_f_phys(zap)->zap_flags); |
428870ff | 55 | } |
34dc7c2f | 56 | |
428870ff BB |
57 | int |
58 | zap_hashbits(zap_t *zap) | |
34dc7c2f | 59 | { |
428870ff BB |
60 | if (zap_getflags(zap) & ZAP_FLAG_HASH64) |
61 | return (48); | |
62 | else | |
63 | return (28); | |
64 | } | |
34dc7c2f | 65 | |
428870ff BB |
66 | uint32_t |
67 | zap_maxcd(zap_t *zap) | |
68 | { | |
69 | if (zap_getflags(zap) & ZAP_FLAG_HASH64) | |
70 | return ((1<<16)-1); | |
71 | else | |
72 | return (-1U); | |
73 | } | |
34dc7c2f | 74 | |
428870ff BB |
75 | static uint64_t |
76 | zap_hash(zap_name_t *zn) | |
77 | { | |
78 | zap_t *zap = zn->zn_zap; | |
79 | uint64_t h = 0; | |
34dc7c2f | 80 | |
428870ff BB |
81 | if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { |
82 | ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); | |
83 | h = *(uint64_t *)zn->zn_key_orig; | |
84 | } else { | |
85 | h = zap->zap_salt; | |
86 | ASSERT(h != 0); | |
87 | ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); | |
88 | ||
89 | if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { | |
428870ff BB |
90 | const uint64_t *wp = zn->zn_key_norm; |
91 | ||
92 | ASSERT(zn->zn_key_intlen == 8); | |
d2a12f9e MA |
93 | for (int i = 0; i < zn->zn_key_norm_numints; |
94 | wp++, i++) { | |
428870ff BB |
95 | uint64_t word = *wp; |
96 | ||
9dcdee78 | 97 | for (int j = 0; j < 8; j++) { |
428870ff BB |
98 | h = (h >> 8) ^ |
99 | zfs_crc64_table[(h ^ word) & 0xFF]; | |
100 | word >>= NBBY; | |
101 | } | |
102 | } | |
103 | } else { | |
428870ff BB |
104 | const uint8_t *cp = zn->zn_key_norm; |
105 | ||
106 | /* | |
107 | * We previously stored the terminating null on | |
108 | * disk, but didn't hash it, so we need to | |
109 | * continue to not hash it. (The | |
110 | * zn_key_*_numints includes the terminating | |
111 | * null for non-binary keys.) | |
112 | */ | |
d2a12f9e | 113 | int len = zn->zn_key_norm_numints - 1; |
428870ff BB |
114 | |
115 | ASSERT(zn->zn_key_intlen == 1); | |
d2a12f9e | 116 | for (int i = 0; i < len; cp++, i++) { |
428870ff BB |
117 | h = (h >> 8) ^ |
118 | zfs_crc64_table[(h ^ *cp) & 0xFF]; | |
119 | } | |
120 | } | |
121 | } | |
34dc7c2f | 122 | /* |
428870ff BB |
123 | * Don't use all 64 bits, since we need some in the cookie for |
124 | * the collision differentiator. We MUST use the high bits, | |
125 | * since those are the ones that we first pay attention to when | |
4e33ba4c | 126 | * choosing the bucket. |
34dc7c2f | 127 | */ |
428870ff | 128 | h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); |
34dc7c2f | 129 | |
428870ff | 130 | return (h); |
34dc7c2f BB |
131 | } |
132 | ||
133 | static int | |
9b7b9cd3 | 134 | zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags) |
34dc7c2f | 135 | { |
428870ff BB |
136 | ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); |
137 | ||
d2a12f9e MA |
138 | size_t inlen = strlen(name) + 1; |
139 | size_t outlen = ZAP_MAXNAMELEN; | |
34dc7c2f | 140 | |
d2a12f9e | 141 | int err = 0; |
34dc7c2f | 142 | (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, |
9b7b9cd3 GM |
143 | normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID, |
144 | U8_UNICODE_LATEST, &err); | |
34dc7c2f BB |
145 | |
146 | return (err); | |
147 | } | |
148 | ||
149 | boolean_t | |
150 | zap_match(zap_name_t *zn, const char *matchname) | |
151 | { | |
428870ff BB |
152 | ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); |
153 | ||
9b7b9cd3 | 154 | if (zn->zn_matchtype & MT_NORMALIZE) { |
34dc7c2f BB |
155 | char norm[ZAP_MAXNAMELEN]; |
156 | ||
9b7b9cd3 GM |
157 | if (zap_normalize(zn->zn_zap, matchname, norm, |
158 | zn->zn_normflags) != 0) | |
34dc7c2f BB |
159 | return (B_FALSE); |
160 | ||
428870ff | 161 | return (strcmp(zn->zn_key_norm, norm) == 0); |
34dc7c2f | 162 | } else { |
428870ff | 163 | return (strcmp(zn->zn_key_orig, matchname) == 0); |
34dc7c2f BB |
164 | } |
165 | } | |
166 | ||
9dcdee78 AM |
167 | static zap_name_t * |
168 | zap_name_alloc(zap_t *zap) | |
169 | { | |
170 | zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); | |
171 | zn->zn_zap = zap; | |
172 | return (zn); | |
173 | } | |
174 | ||
34dc7c2f BB |
175 | void |
176 | zap_name_free(zap_name_t *zn) | |
177 | { | |
178 | kmem_free(zn, sizeof (zap_name_t)); | |
179 | } | |
180 | ||
9dcdee78 AM |
181 | static int |
182 | zap_name_init_str(zap_name_t *zn, const char *key, matchtype_t mt) | |
34dc7c2f | 183 | { |
9dcdee78 | 184 | zap_t *zap = zn->zn_zap; |
34dc7c2f | 185 | |
428870ff BB |
186 | zn->zn_key_intlen = sizeof (*key); |
187 | zn->zn_key_orig = key; | |
188 | zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; | |
34dc7c2f | 189 | zn->zn_matchtype = mt; |
9b7b9cd3 GM |
190 | zn->zn_normflags = zap->zap_normflags; |
191 | ||
192 | /* | |
193 | * If we're dealing with a case sensitive lookup on a mixed or | |
194 | * insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup | |
195 | * will fold case to all caps overriding the lookup request. | |
196 | */ | |
197 | if (mt & MT_MATCH_CASE) | |
198 | zn->zn_normflags &= ~U8_TEXTPREP_TOUPPER; | |
199 | ||
34dc7c2f | 200 | if (zap->zap_normflags) { |
9b7b9cd3 GM |
201 | /* |
202 | * We *must* use zap_normflags because this normalization is | |
203 | * what the hash is computed from. | |
204 | */ | |
205 | if (zap_normalize(zap, key, zn->zn_normbuf, | |
9dcdee78 AM |
206 | zap->zap_normflags) != 0) |
207 | return (SET_ERROR(ENOTSUP)); | |
428870ff BB |
208 | zn->zn_key_norm = zn->zn_normbuf; |
209 | zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; | |
34dc7c2f | 210 | } else { |
9dcdee78 AM |
211 | if (mt != 0) |
212 | return (SET_ERROR(ENOTSUP)); | |
428870ff BB |
213 | zn->zn_key_norm = zn->zn_key_orig; |
214 | zn->zn_key_norm_numints = zn->zn_key_orig_numints; | |
34dc7c2f BB |
215 | } |
216 | ||
428870ff | 217 | zn->zn_hash = zap_hash(zn); |
9b7b9cd3 GM |
218 | |
219 | if (zap->zap_normflags != zn->zn_normflags) { | |
220 | /* | |
221 | * We *must* use zn_normflags because this normalization is | |
222 | * what the matching is based on. (Not the hash!) | |
223 | */ | |
224 | if (zap_normalize(zap, key, zn->zn_normbuf, | |
9dcdee78 AM |
225 | zn->zn_normflags) != 0) |
226 | return (SET_ERROR(ENOTSUP)); | |
9b7b9cd3 GM |
227 | zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; |
228 | } | |
229 | ||
9dcdee78 AM |
230 | return (0); |
231 | } | |
232 | ||
233 | zap_name_t * | |
234 | zap_name_alloc_str(zap_t *zap, const char *key, matchtype_t mt) | |
235 | { | |
236 | zap_name_t *zn = zap_name_alloc(zap); | |
237 | if (zap_name_init_str(zn, key, mt) != 0) { | |
238 | zap_name_free(zn); | |
239 | return (NULL); | |
240 | } | |
428870ff BB |
241 | return (zn); |
242 | } | |
243 | ||
65c7cc49 | 244 | static zap_name_t * |
428870ff BB |
245 | zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) |
246 | { | |
79c76d5b | 247 | zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); |
428870ff BB |
248 | |
249 | ASSERT(zap->zap_normflags == 0); | |
250 | zn->zn_zap = zap; | |
251 | zn->zn_key_intlen = sizeof (*key); | |
252 | zn->zn_key_orig = zn->zn_key_norm = key; | |
253 | zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; | |
9b7b9cd3 | 254 | zn->zn_matchtype = 0; |
428870ff BB |
255 | |
256 | zn->zn_hash = zap_hash(zn); | |
34dc7c2f BB |
257 | return (zn); |
258 | } | |
259 | ||
260 | static void | |
261 | mzap_byteswap(mzap_phys_t *buf, size_t size) | |
262 | { | |
34dc7c2f BB |
263 | buf->mz_block_type = BSWAP_64(buf->mz_block_type); |
264 | buf->mz_salt = BSWAP_64(buf->mz_salt); | |
265 | buf->mz_normflags = BSWAP_64(buf->mz_normflags); | |
d2a12f9e MA |
266 | int max = (size / MZAP_ENT_LEN) - 1; |
267 | for (int i = 0; i < max; i++) { | |
34dc7c2f BB |
268 | buf->mz_chunk[i].mze_value = |
269 | BSWAP_64(buf->mz_chunk[i].mze_value); | |
270 | buf->mz_chunk[i].mze_cd = | |
271 | BSWAP_32(buf->mz_chunk[i].mze_cd); | |
272 | } | |
273 | } | |
274 | ||
275 | void | |
276 | zap_byteswap(void *buf, size_t size) | |
277 | { | |
d2a12f9e | 278 | uint64_t block_type = *(uint64_t *)buf; |
34dc7c2f BB |
279 | |
280 | if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { | |
281 | /* ASSERT(magic == ZAP_LEAF_MAGIC); */ | |
282 | mzap_byteswap(buf, size); | |
283 | } else { | |
284 | fzap_byteswap(buf, size); | |
285 | } | |
286 | } | |
287 | ||
677c6f84 | 288 | __attribute__((always_inline)) inline |
34dc7c2f BB |
289 | static int |
290 | mze_compare(const void *arg1, const void *arg2) | |
291 | { | |
292 | const mzap_ent_t *mze1 = arg1; | |
293 | const mzap_ent_t *mze2 = arg2; | |
294 | ||
9dcdee78 AM |
295 | return (TREE_CMP((uint64_t)(mze1->mze_hash) << 32 | mze1->mze_cd, |
296 | (uint64_t)(mze2->mze_hash) << 32 | mze2->mze_cd)); | |
34dc7c2f BB |
297 | } |
298 | ||
677c6f84 RY |
299 | ZFS_BTREE_FIND_IN_BUF_FUNC(mze_find_in_buf, mzap_ent_t, |
300 | mze_compare) | |
301 | ||
34dc7c2f | 302 | static void |
9dcdee78 | 303 | mze_insert(zap_t *zap, uint16_t chunkid, uint64_t hash) |
34dc7c2f | 304 | { |
9dcdee78 AM |
305 | mzap_ent_t mze; |
306 | ||
34dc7c2f BB |
307 | ASSERT(zap->zap_ismicro); |
308 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); | |
34dc7c2f | 309 | |
9dcdee78 AM |
310 | mze.mze_chunkid = chunkid; |
311 | ASSERT0(hash & 0xffffffff); | |
312 | mze.mze_hash = hash >> 32; | |
313 | ASSERT3U(MZE_PHYS(zap, &mze)->mze_cd, <=, 0xffff); | |
314 | mze.mze_cd = (uint16_t)MZE_PHYS(zap, &mze)->mze_cd; | |
315 | ASSERT(MZE_PHYS(zap, &mze)->mze_name[0] != 0); | |
316 | zfs_btree_add(&zap->zap_m.zap_tree, &mze); | |
34dc7c2f BB |
317 | } |
318 | ||
319 | static mzap_ent_t * | |
9dcdee78 | 320 | mze_find(zap_name_t *zn, zfs_btree_index_t *idx) |
34dc7c2f BB |
321 | { |
322 | mzap_ent_t mze_tofind; | |
323 | mzap_ent_t *mze; | |
9dcdee78 | 324 | zfs_btree_t *tree = &zn->zn_zap->zap_m.zap_tree; |
34dc7c2f BB |
325 | |
326 | ASSERT(zn->zn_zap->zap_ismicro); | |
327 | ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); | |
328 | ||
9dcdee78 AM |
329 | ASSERT0(zn->zn_hash & 0xffffffff); |
330 | mze_tofind.mze_hash = zn->zn_hash >> 32; | |
428870ff | 331 | mze_tofind.mze_cd = 0; |
34dc7c2f | 332 | |
9dcdee78 | 333 | mze = zfs_btree_find(tree, &mze_tofind, idx); |
34dc7c2f | 334 | if (mze == NULL) |
9dcdee78 AM |
335 | mze = zfs_btree_next(tree, idx, idx); |
336 | for (; mze && mze->mze_hash == mze_tofind.mze_hash; | |
337 | mze = zfs_btree_next(tree, idx, idx)) { | |
428870ff BB |
338 | ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); |
339 | if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) | |
34dc7c2f BB |
340 | return (mze); |
341 | } | |
9b7b9cd3 | 342 | |
34dc7c2f BB |
343 | return (NULL); |
344 | } | |
345 | ||
346 | static uint32_t | |
347 | mze_find_unused_cd(zap_t *zap, uint64_t hash) | |
348 | { | |
349 | mzap_ent_t mze_tofind; | |
9dcdee78 AM |
350 | zfs_btree_index_t idx; |
351 | zfs_btree_t *tree = &zap->zap_m.zap_tree; | |
34dc7c2f BB |
352 | |
353 | ASSERT(zap->zap_ismicro); | |
354 | ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); | |
355 | ||
9dcdee78 AM |
356 | ASSERT0(hash & 0xffffffff); |
357 | hash >>= 32; | |
34dc7c2f | 358 | mze_tofind.mze_hash = hash; |
428870ff | 359 | mze_tofind.mze_cd = 0; |
34dc7c2f | 360 | |
d2a12f9e | 361 | uint32_t cd = 0; |
9dcdee78 AM |
362 | for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx); |
363 | mze && mze->mze_hash == hash; | |
364 | mze = zfs_btree_next(tree, &idx, &idx)) { | |
428870ff | 365 | if (mze->mze_cd != cd) |
34dc7c2f BB |
366 | break; |
367 | cd++; | |
368 | } | |
369 | ||
370 | return (cd); | |
371 | } | |
372 | ||
599b8648 CC |
373 | /* |
374 | * Each mzap entry requires at max : 4 chunks | |
375 | * 3 chunks for names + 1 chunk for value. | |
376 | */ | |
377 | #define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \ | |
378 | ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t))) | |
379 | ||
380 | /* | |
381 | * Check if the current entry keeps the colliding entries under the fatzap leaf | |
382 | * size. | |
383 | */ | |
384 | static boolean_t | |
385 | mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash) | |
386 | { | |
387 | zap_t *zap = zn->zn_zap; | |
388 | mzap_ent_t mze_tofind; | |
9dcdee78 AM |
389 | zfs_btree_index_t idx; |
390 | zfs_btree_t *tree = &zap->zap_m.zap_tree; | |
599b8648 CC |
391 | uint32_t mzap_ents = 0; |
392 | ||
9dcdee78 AM |
393 | ASSERT0(hash & 0xffffffff); |
394 | hash >>= 32; | |
599b8648 CC |
395 | mze_tofind.mze_hash = hash; |
396 | mze_tofind.mze_cd = 0; | |
397 | ||
9dcdee78 AM |
398 | for (mzap_ent_t *mze = zfs_btree_find(tree, &mze_tofind, &idx); |
399 | mze && mze->mze_hash == hash; | |
400 | mze = zfs_btree_next(tree, &idx, &idx)) { | |
599b8648 CC |
401 | mzap_ents++; |
402 | } | |
403 | ||
404 | /* Include the new entry being added */ | |
405 | mzap_ents++; | |
406 | ||
407 | return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS)); | |
408 | } | |
409 | ||
34dc7c2f BB |
410 | static void |
411 | mze_destroy(zap_t *zap) | |
412 | { | |
9dcdee78 AM |
413 | zfs_btree_clear(&zap->zap_m.zap_tree); |
414 | zfs_btree_destroy(&zap->zap_m.zap_tree); | |
34dc7c2f BB |
415 | } |
416 | ||
417 | static zap_t * | |
418 | mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) | |
419 | { | |
420 | zap_t *winner; | |
32c8c946 CC |
421 | uint64_t *zap_hdr = (uint64_t *)db->db_data; |
422 | uint64_t zap_block_type = zap_hdr[0]; | |
423 | uint64_t zap_magic = zap_hdr[1]; | |
34dc7c2f BB |
424 | |
425 | ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); | |
426 | ||
d2a12f9e | 427 | zap_t *zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); |
ef5319df | 428 | rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL); |
34dc7c2f BB |
429 | rw_enter(&zap->zap_rwlock, RW_WRITER); |
430 | zap->zap_objset = os; | |
431 | zap->zap_object = obj; | |
432 | zap->zap_dbuf = db; | |
433 | ||
32c8c946 | 434 | if (zap_block_type != ZBT_MICRO) { |
c17486b2 GN |
435 | mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, MUTEX_DEFAULT, |
436 | 0); | |
9bd274dd | 437 | zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1; |
32c8c946 CC |
438 | if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) { |
439 | winner = NULL; /* No actual winner here... */ | |
440 | goto handle_winner; | |
441 | } | |
34dc7c2f BB |
442 | } else { |
443 | zap->zap_ismicro = TRUE; | |
444 | } | |
445 | ||
446 | /* | |
447 | * Make sure that zap_ismicro is set before we let others see | |
448 | * it, because zap_lockdir() checks zap_ismicro without the lock | |
449 | * held. | |
450 | */ | |
39efbde7 | 451 | dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf); |
0c66c32d | 452 | winner = dmu_buf_set_user(db, &zap->zap_dbu); |
34dc7c2f | 453 | |
32c8c946 CC |
454 | if (winner != NULL) |
455 | goto handle_winner; | |
34dc7c2f BB |
456 | |
457 | if (zap->zap_ismicro) { | |
d683ddbb JG |
458 | zap->zap_salt = zap_m_phys(zap)->mz_salt; |
459 | zap->zap_normflags = zap_m_phys(zap)->mz_normflags; | |
34dc7c2f | 460 | zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; |
34dc7c2f | 461 | |
9dcdee78 AM |
462 | /* |
463 | * Reduce B-tree leaf from 4KB to 512 bytes to reduce memmove() | |
464 | * overhead on massive inserts below. It still allows to store | |
465 | * 62 entries before we have to add 2KB B-tree core node. | |
466 | */ | |
467 | zfs_btree_create_custom(&zap->zap_m.zap_tree, mze_compare, | |
677c6f84 | 468 | mze_find_in_buf, sizeof (mzap_ent_t), 512); |
9dcdee78 AM |
469 | |
470 | zap_name_t *zn = zap_name_alloc(zap); | |
471 | for (uint16_t i = 0; i < zap->zap_m.zap_num_chunks; i++) { | |
34dc7c2f | 472 | mzap_ent_phys_t *mze = |
d683ddbb | 473 | &zap_m_phys(zap)->mz_chunk[i]; |
34dc7c2f | 474 | if (mze->mze_name[0]) { |
34dc7c2f | 475 | zap->zap_m.zap_num_entries++; |
9dcdee78 | 476 | zap_name_init_str(zn, mze->mze_name, 0); |
428870ff | 477 | mze_insert(zap, i, zn->zn_hash); |
34dc7c2f BB |
478 | } |
479 | } | |
9dcdee78 | 480 | zap_name_free(zn); |
34dc7c2f | 481 | } else { |
d683ddbb JG |
482 | zap->zap_salt = zap_f_phys(zap)->zap_salt; |
483 | zap->zap_normflags = zap_f_phys(zap)->zap_normflags; | |
34dc7c2f BB |
484 | |
485 | ASSERT3U(sizeof (struct zap_leaf_header), ==, | |
486 | 2*ZAP_LEAF_CHUNKSIZE); | |
487 | ||
488 | /* | |
489 | * The embedded pointer table should not overlap the | |
490 | * other members. | |
491 | */ | |
492 | ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, | |
d683ddbb | 493 | &zap_f_phys(zap)->zap_salt); |
34dc7c2f BB |
494 | |
495 | /* | |
496 | * The embedded pointer table should end at the end of | |
497 | * the block | |
498 | */ | |
499 | ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, | |
500 | 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - | |
d683ddbb | 501 | (uintptr_t)zap_f_phys(zap), ==, |
34dc7c2f BB |
502 | zap->zap_dbuf->db_size); |
503 | } | |
504 | rw_exit(&zap->zap_rwlock); | |
505 | return (zap); | |
32c8c946 CC |
506 | |
507 | handle_winner: | |
508 | rw_exit(&zap->zap_rwlock); | |
509 | rw_destroy(&zap->zap_rwlock); | |
510 | if (!zap->zap_ismicro) | |
511 | mutex_destroy(&zap->zap_f.zap_num_entries_mtx); | |
512 | kmem_free(zap, sizeof (zap_t)); | |
513 | return (winner); | |
34dc7c2f BB |
514 | } |
515 | ||
1a5b96b8 MA |
516 | /* |
517 | * This routine "consumes" the caller's hold on the dbuf, which must | |
518 | * have the specified tag. | |
519 | */ | |
8bea9815 | 520 | static int |
dd66857d | 521 | zap_lockdir_impl(dmu_buf_t *db, const void *tag, dmu_tx_t *tx, |
34dc7c2f BB |
522 | krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) |
523 | { | |
d2a12f9e | 524 | ASSERT0(db->db_offset); |
8bea9815 MA |
525 | objset_t *os = dmu_buf_get_objset(db); |
526 | uint64_t obj = db->db_object; | |
d2a12f9e | 527 | dmu_object_info_t doi; |
34dc7c2f | 528 | |
8bea9815 | 529 | *zapp = NULL; |
34dc7c2f | 530 | |
ceb49b0a BB |
531 | dmu_object_info_from_db(db, &doi); |
532 | if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP) | |
533 | return (SET_ERROR(EINVAL)); | |
34dc7c2f | 534 | |
d2a12f9e | 535 | zap_t *zap = dmu_buf_get_user(db); |
32c8c946 | 536 | if (zap == NULL) { |
34dc7c2f | 537 | zap = mzap_open(os, obj, db); |
32c8c946 CC |
538 | if (zap == NULL) { |
539 | /* | |
540 | * mzap_open() didn't like what it saw on-disk. | |
541 | * Check for corruption! | |
542 | */ | |
32c8c946 CC |
543 | return (SET_ERROR(EIO)); |
544 | } | |
545 | } | |
34dc7c2f BB |
546 | |
547 | /* | |
548 | * We're checking zap_ismicro without the lock held, in order to | |
549 | * tell what type of lock we want. Once we have some sort of | |
550 | * lock, see if it really is the right type. In practice this | |
551 | * can only be different if it was upgraded from micro to fat, | |
552 | * and micro wanted WRITER but fat only needs READER. | |
553 | */ | |
d2a12f9e | 554 | krw_t lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; |
34dc7c2f BB |
555 | rw_enter(&zap->zap_rwlock, lt); |
556 | if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { | |
557 | /* it was upgraded, now we only need reader */ | |
558 | ASSERT(lt == RW_WRITER); | |
559 | ASSERT(RW_READER == | |
6d79eabf | 560 | ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)); |
34dc7c2f BB |
561 | rw_downgrade(&zap->zap_rwlock); |
562 | lt = RW_READER; | |
563 | } | |
564 | ||
565 | zap->zap_objset = os; | |
566 | ||
567 | if (lt == RW_WRITER) | |
568 | dmu_buf_will_dirty(db, tx); | |
569 | ||
570 | ASSERT3P(zap->zap_dbuf, ==, db); | |
571 | ||
572 | ASSERT(!zap->zap_ismicro || | |
573 | zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); | |
574 | if (zap->zap_ismicro && tx && adding && | |
575 | zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { | |
576 | uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; | |
a4b21ead | 577 | if (newsz > zap_micro_max_size) { |
34dc7c2f | 578 | dprintf("upgrading obj %llu: num_entries=%u\n", |
8e739b2c | 579 | (u_longlong_t)obj, zap->zap_m.zap_num_entries); |
34dc7c2f | 580 | *zapp = zap; |
1c27024e | 581 | int err = mzap_upgrade(zapp, tag, tx, 0); |
8bea9815 MA |
582 | if (err != 0) |
583 | rw_exit(&zap->zap_rwlock); | |
584 | return (err); | |
34dc7c2f | 585 | } |
8bea9815 | 586 | VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx)); |
34dc7c2f BB |
587 | zap->zap_m.zap_num_chunks = |
588 | db->db_size / MZAP_ENT_LEN - 1; | |
589 | } | |
590 | ||
591 | *zapp = zap; | |
592 | return (0); | |
593 | } | |
594 | ||
2bce8049 MA |
595 | static int |
596 | zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx, | |
dd66857d AZ |
597 | krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag, |
598 | zap_t **zapp) | |
2bce8049 MA |
599 | { |
600 | dmu_buf_t *db; | |
2bce8049 | 601 | |
d2a12f9e | 602 | int err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH); |
2bce8049 MA |
603 | if (err != 0) { |
604 | return (err); | |
605 | } | |
1a5b96b8 MA |
606 | #ifdef ZFS_DEBUG |
607 | { | |
608 | dmu_object_info_t doi; | |
609 | dmu_object_info_from_db(db, &doi); | |
610 | ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); | |
611 | } | |
612 | #endif | |
613 | ||
2bce8049 MA |
614 | err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); |
615 | if (err != 0) { | |
616 | dmu_buf_rele(db, tag); | |
617 | } | |
618 | return (err); | |
619 | } | |
620 | ||
8bea9815 MA |
621 | int |
622 | zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, | |
dd66857d AZ |
623 | krw_t lti, boolean_t fatreader, boolean_t adding, const void *tag, |
624 | zap_t **zapp) | |
8bea9815 MA |
625 | { |
626 | dmu_buf_t *db; | |
8bea9815 | 627 | |
d2a12f9e | 628 | int err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH); |
8bea9815 MA |
629 | if (err != 0) |
630 | return (err); | |
1a5b96b8 MA |
631 | #ifdef ZFS_DEBUG |
632 | { | |
633 | dmu_object_info_t doi; | |
634 | dmu_object_info_from_db(db, &doi); | |
635 | ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); | |
636 | } | |
637 | #endif | |
8bea9815 MA |
638 | err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); |
639 | if (err != 0) | |
640 | dmu_buf_rele(db, tag); | |
641 | return (err); | |
642 | } | |
643 | ||
34dc7c2f | 644 | void |
dd66857d | 645 | zap_unlockdir(zap_t *zap, const void *tag) |
34dc7c2f BB |
646 | { |
647 | rw_exit(&zap->zap_rwlock); | |
8bea9815 | 648 | dmu_buf_rele(zap->zap_dbuf, tag); |
34dc7c2f BB |
649 | } |
650 | ||
651 | static int | |
dd66857d | 652 | mzap_upgrade(zap_t **zapp, const void *tag, dmu_tx_t *tx, zap_flags_t flags) |
34dc7c2f | 653 | { |
428870ff | 654 | int err = 0; |
34dc7c2f BB |
655 | zap_t *zap = *zapp; |
656 | ||
657 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); | |
658 | ||
d2a12f9e MA |
659 | int sz = zap->zap_dbuf->db_size; |
660 | mzap_phys_t *mzp = vmem_alloc(sz, KM_SLEEP); | |
861166b0 | 661 | memcpy(mzp, zap->zap_dbuf->db_data, sz); |
d2a12f9e | 662 | int nchunks = zap->zap_m.zap_num_chunks; |
34dc7c2f | 663 | |
428870ff BB |
664 | if (!flags) { |
665 | err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, | |
666 | 1ULL << fzap_default_block_shift, 0, tx); | |
d2a12f9e | 667 | if (err != 0) { |
a3fd9d9e | 668 | vmem_free(mzp, sz); |
428870ff BB |
669 | return (err); |
670 | } | |
34dc7c2f BB |
671 | } |
672 | ||
673 | dprintf("upgrading obj=%llu with %u chunks\n", | |
8e739b2c | 674 | (u_longlong_t)zap->zap_object, nchunks); |
9dcdee78 | 675 | /* XXX destroy the tree later, so we can use the stored hash value */ |
34dc7c2f BB |
676 | mze_destroy(zap); |
677 | ||
428870ff | 678 | fzap_upgrade(zap, tx, flags); |
34dc7c2f | 679 | |
9dcdee78 | 680 | zap_name_t *zn = zap_name_alloc(zap); |
d2a12f9e | 681 | for (int i = 0; i < nchunks; i++) { |
34dc7c2f | 682 | mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; |
34dc7c2f BB |
683 | if (mze->mze_name[0] == 0) |
684 | continue; | |
685 | dprintf("adding %s=%llu\n", | |
8e739b2c | 686 | mze->mze_name, (u_longlong_t)mze->mze_value); |
9dcdee78 | 687 | zap_name_init_str(zn, mze->mze_name, 0); |
599b8648 CC |
688 | /* If we fail here, we would end up losing entries */ |
689 | VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, | |
690 | tag, tx)); | |
34dc7c2f | 691 | zap = zn->zn_zap; /* fzap_add_cd() may change zap */ |
34dc7c2f | 692 | } |
9dcdee78 | 693 | zap_name_free(zn); |
a3fd9d9e | 694 | vmem_free(mzp, sz); |
34dc7c2f | 695 | *zapp = zap; |
599b8648 | 696 | return (0); |
34dc7c2f BB |
697 | } |
698 | ||
9b7b9cd3 GM |
699 | /* |
700 | * The "normflags" determine the behavior of the matchtype_t which is | |
701 | * passed to zap_lookup_norm(). Names which have the same normalized | |
702 | * version will be stored with the same hash value, and therefore we can | |
703 | * perform normalization-insensitive lookups. We can be Unicode form- | |
704 | * insensitive and/or case-insensitive. The following flags are valid for | |
705 | * "normflags": | |
706 | * | |
707 | * U8_TEXTPREP_NFC | |
708 | * U8_TEXTPREP_NFD | |
709 | * U8_TEXTPREP_NFKC | |
710 | * U8_TEXTPREP_NFKD | |
711 | * U8_TEXTPREP_TOUPPER | |
712 | * | |
713 | * The *_NF* (Normalization Form) flags are mutually exclusive; at most one | |
714 | * of them may be supplied. | |
715 | */ | |
fa86b5db | 716 | void |
6955b401 | 717 | mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, dmu_tx_t *tx) |
34dc7c2f BB |
718 | { |
719 | dmu_buf_t *db; | |
34dc7c2f | 720 | |
6955b401 | 721 | VERIFY0(dmu_buf_hold_by_dnode(dn, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); |
34dc7c2f | 722 | |
34dc7c2f | 723 | dmu_buf_will_dirty(db, tx); |
d2a12f9e | 724 | mzap_phys_t *zp = db->db_data; |
34dc7c2f | 725 | zp->mz_block_type = ZBT_MICRO; |
6955b401 BB |
726 | zp->mz_salt = |
727 | ((uintptr_t)db ^ (uintptr_t)tx ^ (dn->dn_object << 1)) | 1ULL; | |
34dc7c2f | 728 | zp->mz_normflags = normflags; |
428870ff BB |
729 | |
730 | if (flags != 0) { | |
731 | zap_t *zap; | |
732 | /* Only fat zap supports flags; upgrade immediately. */ | |
1a5b96b8 MA |
733 | VERIFY0(zap_lockdir_impl(db, FTAG, tx, RW_WRITER, |
734 | B_FALSE, B_FALSE, &zap)); | |
d2a12f9e | 735 | VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags)); |
8bea9815 | 736 | zap_unlockdir(zap, FTAG); |
1a5b96b8 MA |
737 | } else { |
738 | dmu_buf_rele(db, FTAG); | |
428870ff | 739 | } |
34dc7c2f BB |
740 | } |
741 | ||
6955b401 BB |
742 | static uint64_t |
743 | zap_create_impl(objset_t *os, int normflags, zap_flags_t flags, | |
744 | dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, | |
745 | dmu_object_type_t bonustype, int bonuslen, int dnodesize, | |
dd66857d | 746 | dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx) |
6955b401 BB |
747 | { |
748 | uint64_t obj; | |
749 | ||
750 | ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); | |
751 | ||
752 | if (allocated_dnode == NULL) { | |
753 | dnode_t *dn; | |
754 | obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift, | |
755 | indirect_blockshift, bonustype, bonuslen, dnodesize, | |
756 | &dn, FTAG, tx); | |
757 | mzap_create_impl(dn, normflags, flags, tx); | |
758 | dnode_rele(dn, FTAG); | |
759 | } else { | |
760 | obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift, | |
761 | indirect_blockshift, bonustype, bonuslen, dnodesize, | |
762 | allocated_dnode, tag, tx); | |
763 | mzap_create_impl(*allocated_dnode, normflags, flags, tx); | |
764 | } | |
765 | ||
766 | return (obj); | |
767 | } | |
768 | ||
34dc7c2f BB |
769 | int |
770 | zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, | |
771 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
772 | { | |
50c957f7 NB |
773 | return (zap_create_claim_dnsize(os, obj, ot, bonustype, bonuslen, |
774 | 0, tx)); | |
775 | } | |
776 | ||
777 | int | |
778 | zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot, | |
779 | dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) | |
780 | { | |
781 | return (zap_create_claim_norm_dnsize(os, obj, | |
782 | 0, ot, bonustype, bonuslen, dnodesize, tx)); | |
34dc7c2f BB |
783 | } |
784 | ||
785 | int | |
786 | zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, | |
787 | dmu_object_type_t ot, | |
788 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
50c957f7 NB |
789 | { |
790 | return (zap_create_claim_norm_dnsize(os, obj, normflags, ot, bonustype, | |
791 | bonuslen, 0, tx)); | |
792 | } | |
793 | ||
794 | int | |
795 | zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags, | |
796 | dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, | |
797 | int dnodesize, dmu_tx_t *tx) | |
34dc7c2f | 798 | { |
6955b401 BB |
799 | dnode_t *dn; |
800 | int error; | |
801 | ||
1a5b96b8 | 802 | ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); |
6955b401 | 803 | error = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen, |
50c957f7 | 804 | dnodesize, tx); |
6955b401 BB |
805 | if (error != 0) |
806 | return (error); | |
807 | ||
808 | error = dnode_hold(os, obj, FTAG, &dn); | |
809 | if (error != 0) | |
810 | return (error); | |
811 | ||
812 | mzap_create_impl(dn, normflags, 0, tx); | |
813 | ||
814 | dnode_rele(dn, FTAG); | |
815 | ||
34dc7c2f BB |
816 | return (0); |
817 | } | |
818 | ||
819 | uint64_t | |
820 | zap_create(objset_t *os, dmu_object_type_t ot, | |
821 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
822 | { | |
823 | return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); | |
824 | } | |
825 | ||
50c957f7 NB |
826 | uint64_t |
827 | zap_create_dnsize(objset_t *os, dmu_object_type_t ot, | |
828 | dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) | |
829 | { | |
830 | return (zap_create_norm_dnsize(os, 0, ot, bonustype, bonuslen, | |
831 | dnodesize, tx)); | |
832 | } | |
833 | ||
34dc7c2f BB |
834 | uint64_t |
835 | zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, | |
836 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
837 | { | |
50c957f7 NB |
838 | return (zap_create_norm_dnsize(os, normflags, ot, bonustype, bonuslen, |
839 | 0, tx)); | |
840 | } | |
841 | ||
842 | uint64_t | |
843 | zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot, | |
844 | dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) | |
845 | { | |
6955b401 BB |
846 | return (zap_create_impl(os, normflags, 0, ot, 0, 0, |
847 | bonustype, bonuslen, dnodesize, NULL, NULL, tx)); | |
428870ff BB |
848 | } |
849 | ||
850 | uint64_t | |
851 | zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, | |
852 | dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, | |
853 | dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) | |
854 | { | |
50c957f7 NB |
855 | return (zap_create_flags_dnsize(os, normflags, flags, ot, |
856 | leaf_blockshift, indirect_blockshift, bonustype, bonuslen, 0, tx)); | |
857 | } | |
858 | ||
859 | uint64_t | |
860 | zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags, | |
861 | dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, | |
862 | dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) | |
863 | { | |
6955b401 BB |
864 | return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift, |
865 | indirect_blockshift, bonustype, bonuslen, dnodesize, NULL, NULL, | |
866 | tx)); | |
867 | } | |
428870ff | 868 | |
6955b401 BB |
869 | /* |
870 | * Create a zap object and return a pointer to the newly allocated dnode via | |
871 | * the allocated_dnode argument. The returned dnode will be held and the | |
872 | * caller is responsible for releasing the hold by calling dnode_rele(). | |
873 | */ | |
874 | uint64_t | |
875 | zap_create_hold(objset_t *os, int normflags, zap_flags_t flags, | |
876 | dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, | |
877 | dmu_object_type_t bonustype, int bonuslen, int dnodesize, | |
dd66857d | 878 | dnode_t **allocated_dnode, const void *tag, dmu_tx_t *tx) |
6955b401 BB |
879 | { |
880 | return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift, | |
881 | indirect_blockshift, bonustype, bonuslen, dnodesize, | |
882 | allocated_dnode, tag, tx)); | |
34dc7c2f BB |
883 | } |
884 | ||
885 | int | |
886 | zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) | |
887 | { | |
888 | /* | |
889 | * dmu_object_free will free the object number and free the | |
890 | * data. Freeing the data will cause our pageout function to be | |
891 | * called, which will destroy our data (zap_leaf_t's and zap_t). | |
892 | */ | |
893 | ||
894 | return (dmu_object_free(os, zapobj, tx)); | |
895 | } | |
896 | ||
34dc7c2f | 897 | void |
39efbde7 | 898 | zap_evict_sync(void *dbu) |
34dc7c2f | 899 | { |
0c66c32d | 900 | zap_t *zap = dbu; |
34dc7c2f BB |
901 | |
902 | rw_destroy(&zap->zap_rwlock); | |
903 | ||
904 | if (zap->zap_ismicro) | |
905 | mze_destroy(zap); | |
906 | else | |
907 | mutex_destroy(&zap->zap_f.zap_num_entries_mtx); | |
908 | ||
909 | kmem_free(zap, sizeof (zap_t)); | |
910 | } | |
911 | ||
912 | int | |
913 | zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) | |
914 | { | |
915 | zap_t *zap; | |
34dc7c2f | 916 | |
d2a12f9e MA |
917 | int err = |
918 | zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); | |
919 | if (err != 0) | |
34dc7c2f BB |
920 | return (err); |
921 | if (!zap->zap_ismicro) { | |
922 | err = fzap_count(zap, count); | |
923 | } else { | |
924 | *count = zap->zap_m.zap_num_entries; | |
925 | } | |
8bea9815 | 926 | zap_unlockdir(zap, FTAG); |
34dc7c2f BB |
927 | return (err); |
928 | } | |
929 | ||
930 | /* | |
931 | * zn may be NULL; if not specified, it will be computed if needed. | |
932 | * See also the comment above zap_entry_normalization_conflict(). | |
933 | */ | |
934 | static boolean_t | |
9dcdee78 AM |
935 | mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze, |
936 | zfs_btree_index_t *idx) | |
34dc7c2f | 937 | { |
34dc7c2f | 938 | boolean_t allocdzn = B_FALSE; |
9dcdee78 AM |
939 | mzap_ent_t *other; |
940 | zfs_btree_index_t oidx; | |
34dc7c2f BB |
941 | |
942 | if (zap->zap_normflags == 0) | |
943 | return (B_FALSE); | |
944 | ||
9dcdee78 | 945 | for (other = zfs_btree_prev(&zap->zap_m.zap_tree, idx, &oidx); |
34dc7c2f | 946 | other && other->mze_hash == mze->mze_hash; |
9dcdee78 | 947 | other = zfs_btree_prev(&zap->zap_m.zap_tree, &oidx, &oidx)) { |
34dc7c2f BB |
948 | |
949 | if (zn == NULL) { | |
9dcdee78 AM |
950 | zn = zap_name_alloc_str(zap, |
951 | MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE); | |
34dc7c2f BB |
952 | allocdzn = B_TRUE; |
953 | } | |
428870ff | 954 | if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { |
34dc7c2f BB |
955 | if (allocdzn) |
956 | zap_name_free(zn); | |
957 | return (B_TRUE); | |
958 | } | |
959 | } | |
960 | ||
9dcdee78 AM |
961 | for (other = zfs_btree_next(&zap->zap_m.zap_tree, idx, &oidx); |
962 | other && other->mze_hash == mze->mze_hash; | |
963 | other = zfs_btree_next(&zap->zap_m.zap_tree, &oidx, &oidx)) { | |
964 | ||
965 | if (zn == NULL) { | |
966 | zn = zap_name_alloc_str(zap, | |
967 | MZE_PHYS(zap, mze)->mze_name, MT_NORMALIZE); | |
968 | allocdzn = B_TRUE; | |
969 | } | |
970 | if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { | |
971 | if (allocdzn) | |
972 | zap_name_free(zn); | |
973 | return (B_TRUE); | |
974 | } | |
34dc7c2f BB |
975 | } |
976 | ||
977 | if (allocdzn) | |
978 | zap_name_free(zn); | |
979 | return (B_FALSE); | |
980 | } | |
981 | ||
982 | /* | |
983 | * Routines for manipulating attributes. | |
984 | */ | |
985 | ||
986 | int | |
987 | zap_lookup(objset_t *os, uint64_t zapobj, const char *name, | |
988 | uint64_t integer_size, uint64_t num_integers, void *buf) | |
989 | { | |
990 | return (zap_lookup_norm(os, zapobj, name, integer_size, | |
9b7b9cd3 | 991 | num_integers, buf, 0, NULL, 0, NULL)); |
34dc7c2f BB |
992 | } |
993 | ||
8bea9815 MA |
994 | static int |
995 | zap_lookup_impl(zap_t *zap, const char *name, | |
34dc7c2f BB |
996 | uint64_t integer_size, uint64_t num_integers, void *buf, |
997 | matchtype_t mt, char *realname, int rn_len, | |
998 | boolean_t *ncp) | |
999 | { | |
8bea9815 | 1000 | int err = 0; |
34dc7c2f | 1001 | |
9dcdee78 | 1002 | zap_name_t *zn = zap_name_alloc_str(zap, name, mt); |
8bea9815 | 1003 | if (zn == NULL) |
2e528b49 | 1004 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
1005 | |
1006 | if (!zap->zap_ismicro) { | |
1007 | err = fzap_lookup(zn, integer_size, num_integers, buf, | |
1008 | realname, rn_len, ncp); | |
1009 | } else { | |
9dcdee78 AM |
1010 | zfs_btree_index_t idx; |
1011 | mzap_ent_t *mze = mze_find(zn, &idx); | |
34dc7c2f | 1012 | if (mze == NULL) { |
2e528b49 | 1013 | err = SET_ERROR(ENOENT); |
34dc7c2f BB |
1014 | } else { |
1015 | if (num_integers < 1) { | |
2e528b49 | 1016 | err = SET_ERROR(EOVERFLOW); |
34dc7c2f | 1017 | } else if (integer_size != 8) { |
2e528b49 | 1018 | err = SET_ERROR(EINVAL); |
34dc7c2f | 1019 | } else { |
428870ff BB |
1020 | *(uint64_t *)buf = |
1021 | MZE_PHYS(zap, mze)->mze_value; | |
3146fc7e RY |
1022 | if (realname != NULL) |
1023 | (void) strlcpy(realname, | |
1024 | MZE_PHYS(zap, mze)->mze_name, | |
1025 | rn_len); | |
34dc7c2f BB |
1026 | if (ncp) { |
1027 | *ncp = mzap_normalization_conflict(zap, | |
9dcdee78 | 1028 | zn, mze, &idx); |
34dc7c2f BB |
1029 | } |
1030 | } | |
1031 | } | |
1032 | } | |
1033 | zap_name_free(zn); | |
8bea9815 MA |
1034 | return (err); |
1035 | } | |
1036 | ||
1037 | int | |
1038 | zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, | |
1039 | uint64_t integer_size, uint64_t num_integers, void *buf, | |
1040 | matchtype_t mt, char *realname, int rn_len, | |
1041 | boolean_t *ncp) | |
1042 | { | |
1043 | zap_t *zap; | |
8bea9815 | 1044 | |
d2a12f9e MA |
1045 | int err = |
1046 | zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); | |
8bea9815 MA |
1047 | if (err != 0) |
1048 | return (err); | |
1049 | err = zap_lookup_impl(zap, name, integer_size, | |
1050 | num_integers, buf, mt, realname, rn_len, ncp); | |
1051 | zap_unlockdir(zap, FTAG); | |
34dc7c2f BB |
1052 | return (err); |
1053 | } | |
1054 | ||
07248450 BB |
1055 | int |
1056 | zap_prefetch(objset_t *os, uint64_t zapobj, const char *name) | |
1057 | { | |
1058 | zap_t *zap; | |
1059 | int err; | |
1060 | zap_name_t *zn; | |
1061 | ||
8bea9815 | 1062 | err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); |
07248450 BB |
1063 | if (err) |
1064 | return (err); | |
9dcdee78 | 1065 | zn = zap_name_alloc_str(zap, name, 0); |
07248450 | 1066 | if (zn == NULL) { |
8bea9815 | 1067 | zap_unlockdir(zap, FTAG); |
07248450 BB |
1068 | return (SET_ERROR(ENOTSUP)); |
1069 | } | |
1070 | ||
1071 | fzap_prefetch(zn); | |
1072 | zap_name_free(zn); | |
8bea9815 | 1073 | zap_unlockdir(zap, FTAG); |
07248450 BB |
1074 | return (err); |
1075 | } | |
1076 | ||
2bce8049 MA |
1077 | int |
1078 | zap_lookup_by_dnode(dnode_t *dn, const char *name, | |
1079 | uint64_t integer_size, uint64_t num_integers, void *buf) | |
1080 | { | |
1081 | return (zap_lookup_norm_by_dnode(dn, name, integer_size, | |
9b7b9cd3 | 1082 | num_integers, buf, 0, NULL, 0, NULL)); |
2bce8049 MA |
1083 | } |
1084 | ||
1085 | int | |
1086 | zap_lookup_norm_by_dnode(dnode_t *dn, const char *name, | |
1087 | uint64_t integer_size, uint64_t num_integers, void *buf, | |
1088 | matchtype_t mt, char *realname, int rn_len, | |
1089 | boolean_t *ncp) | |
1090 | { | |
1091 | zap_t *zap; | |
2bce8049 | 1092 | |
d2a12f9e | 1093 | int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, |
2bce8049 MA |
1094 | FTAG, &zap); |
1095 | if (err != 0) | |
1096 | return (err); | |
1097 | err = zap_lookup_impl(zap, name, integer_size, | |
1098 | num_integers, buf, mt, realname, rn_len, ncp); | |
1099 | zap_unlockdir(zap, FTAG); | |
1100 | return (err); | |
1101 | } | |
1102 | ||
428870ff BB |
1103 | int |
1104 | zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1105 | int key_numints) | |
1106 | { | |
1107 | zap_t *zap; | |
428870ff | 1108 | |
d2a12f9e MA |
1109 | int err = |
1110 | zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); | |
1111 | if (err != 0) | |
428870ff | 1112 | return (err); |
d2a12f9e | 1113 | zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); |
428870ff | 1114 | if (zn == NULL) { |
8bea9815 | 1115 | zap_unlockdir(zap, FTAG); |
2e528b49 | 1116 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
1117 | } |
1118 | ||
1119 | fzap_prefetch(zn); | |
1120 | zap_name_free(zn); | |
8bea9815 | 1121 | zap_unlockdir(zap, FTAG); |
428870ff BB |
1122 | return (err); |
1123 | } | |
1124 | ||
1125 | int | |
1126 | zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1127 | int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) | |
1128 | { | |
1129 | zap_t *zap; | |
428870ff | 1130 | |
d2a12f9e MA |
1131 | int err = |
1132 | zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); | |
1133 | if (err != 0) | |
428870ff | 1134 | return (err); |
d2a12f9e | 1135 | zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); |
428870ff | 1136 | if (zn == NULL) { |
8bea9815 | 1137 | zap_unlockdir(zap, FTAG); |
2e528b49 | 1138 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
1139 | } |
1140 | ||
1141 | err = fzap_lookup(zn, integer_size, num_integers, buf, | |
1142 | NULL, 0, NULL); | |
1143 | zap_name_free(zn); | |
8bea9815 | 1144 | zap_unlockdir(zap, FTAG); |
428870ff BB |
1145 | return (err); |
1146 | } | |
1147 | ||
1148 | int | |
1149 | zap_contains(objset_t *os, uint64_t zapobj, const char *name) | |
1150 | { | |
fa86b5db | 1151 | int err = zap_lookup_norm(os, zapobj, name, 0, |
9b7b9cd3 | 1152 | 0, NULL, 0, NULL, 0, NULL); |
428870ff BB |
1153 | if (err == EOVERFLOW || err == EINVAL) |
1154 | err = 0; /* found, but skipped reading the value */ | |
1155 | return (err); | |
1156 | } | |
1157 | ||
34dc7c2f BB |
1158 | int |
1159 | zap_length(objset_t *os, uint64_t zapobj, const char *name, | |
1160 | uint64_t *integer_size, uint64_t *num_integers) | |
1161 | { | |
1162 | zap_t *zap; | |
34dc7c2f | 1163 | |
d2a12f9e MA |
1164 | int err = |
1165 | zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); | |
1166 | if (err != 0) | |
34dc7c2f | 1167 | return (err); |
9dcdee78 | 1168 | zap_name_t *zn = zap_name_alloc_str(zap, name, 0); |
34dc7c2f | 1169 | if (zn == NULL) { |
8bea9815 | 1170 | zap_unlockdir(zap, FTAG); |
2e528b49 | 1171 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
1172 | } |
1173 | if (!zap->zap_ismicro) { | |
1174 | err = fzap_length(zn, integer_size, num_integers); | |
1175 | } else { | |
9dcdee78 AM |
1176 | zfs_btree_index_t idx; |
1177 | mzap_ent_t *mze = mze_find(zn, &idx); | |
34dc7c2f | 1178 | if (mze == NULL) { |
2e528b49 | 1179 | err = SET_ERROR(ENOENT); |
34dc7c2f BB |
1180 | } else { |
1181 | if (integer_size) | |
1182 | *integer_size = 8; | |
1183 | if (num_integers) | |
1184 | *num_integers = 1; | |
1185 | } | |
1186 | } | |
1187 | zap_name_free(zn); | |
8bea9815 | 1188 | zap_unlockdir(zap, FTAG); |
34dc7c2f BB |
1189 | return (err); |
1190 | } | |
1191 | ||
428870ff BB |
1192 | int |
1193 | zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1194 | int key_numints, uint64_t *integer_size, uint64_t *num_integers) | |
1195 | { | |
1196 | zap_t *zap; | |
428870ff | 1197 | |
d2a12f9e MA |
1198 | int err = |
1199 | zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); | |
1200 | if (err != 0) | |
428870ff | 1201 | return (err); |
d2a12f9e | 1202 | zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); |
428870ff | 1203 | if (zn == NULL) { |
8bea9815 | 1204 | zap_unlockdir(zap, FTAG); |
2e528b49 | 1205 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
1206 | } |
1207 | err = fzap_length(zn, integer_size, num_integers); | |
1208 | zap_name_free(zn); | |
8bea9815 | 1209 | zap_unlockdir(zap, FTAG); |
428870ff BB |
1210 | return (err); |
1211 | } | |
1212 | ||
34dc7c2f BB |
1213 | static void |
1214 | mzap_addent(zap_name_t *zn, uint64_t value) | |
1215 | { | |
34dc7c2f | 1216 | zap_t *zap = zn->zn_zap; |
9dcdee78 | 1217 | uint16_t start = zap->zap_m.zap_alloc_next; |
34dc7c2f | 1218 | |
34dc7c2f BB |
1219 | ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); |
1220 | ||
1221 | #ifdef ZFS_DEBUG | |
d2a12f9e MA |
1222 | for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) { |
1223 | mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; | |
428870ff | 1224 | ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); |
34dc7c2f BB |
1225 | } |
1226 | #endif | |
1227 | ||
d2a12f9e | 1228 | uint32_t cd = mze_find_unused_cd(zap, zn->zn_hash); |
34dc7c2f | 1229 | /* given the limited size of the microzap, this can't happen */ |
428870ff | 1230 | ASSERT(cd < zap_maxcd(zap)); |
34dc7c2f BB |
1231 | |
1232 | again: | |
9dcdee78 | 1233 | for (uint16_t i = start; i < zap->zap_m.zap_num_chunks; i++) { |
d683ddbb | 1234 | mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; |
34dc7c2f BB |
1235 | if (mze->mze_name[0] == 0) { |
1236 | mze->mze_value = value; | |
1237 | mze->mze_cd = cd; | |
680eada9 | 1238 | (void) strlcpy(mze->mze_name, zn->zn_key_orig, |
1239 | sizeof (mze->mze_name)); | |
34dc7c2f BB |
1240 | zap->zap_m.zap_num_entries++; |
1241 | zap->zap_m.zap_alloc_next = i+1; | |
1242 | if (zap->zap_m.zap_alloc_next == | |
1243 | zap->zap_m.zap_num_chunks) | |
1244 | zap->zap_m.zap_alloc_next = 0; | |
428870ff | 1245 | mze_insert(zap, i, zn->zn_hash); |
34dc7c2f BB |
1246 | return; |
1247 | } | |
1248 | } | |
1249 | if (start != 0) { | |
1250 | start = 0; | |
1251 | goto again; | |
1252 | } | |
989fd514 | 1253 | cmn_err(CE_PANIC, "out of entries!"); |
34dc7c2f BB |
1254 | } |
1255 | ||
0eef1bde | 1256 | static int |
1257 | zap_add_impl(zap_t *zap, const char *key, | |
34dc7c2f | 1258 | int integer_size, uint64_t num_integers, |
dd66857d | 1259 | const void *val, dmu_tx_t *tx, const void *tag) |
34dc7c2f | 1260 | { |
34dc7c2f | 1261 | const uint64_t *intval = val; |
d2a12f9e | 1262 | int err = 0; |
34dc7c2f | 1263 | |
9dcdee78 | 1264 | zap_name_t *zn = zap_name_alloc_str(zap, key, 0); |
34dc7c2f | 1265 | if (zn == NULL) { |
0eef1bde | 1266 | zap_unlockdir(zap, tag); |
2e528b49 | 1267 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
1268 | } |
1269 | if (!zap->zap_ismicro) { | |
0eef1bde | 1270 | err = fzap_add(zn, integer_size, num_integers, val, tag, tx); |
34dc7c2f BB |
1271 | zap = zn->zn_zap; /* fzap_add() may change zap */ |
1272 | } else if (integer_size != 8 || num_integers != 1 || | |
599b8648 CC |
1273 | strlen(key) >= MZAP_NAME_LEN || |
1274 | !mze_canfit_fzap_leaf(zn, zn->zn_hash)) { | |
0eef1bde | 1275 | err = mzap_upgrade(&zn->zn_zap, tag, tx, 0); |
8bea9815 MA |
1276 | if (err == 0) { |
1277 | err = fzap_add(zn, integer_size, num_integers, val, | |
0eef1bde | 1278 | tag, tx); |
8bea9815 | 1279 | } |
34dc7c2f BB |
1280 | zap = zn->zn_zap; /* fzap_add() may change zap */ |
1281 | } else { | |
9dcdee78 AM |
1282 | zfs_btree_index_t idx; |
1283 | if (mze_find(zn, &idx) != NULL) { | |
2e528b49 | 1284 | err = SET_ERROR(EEXIST); |
34dc7c2f BB |
1285 | } else { |
1286 | mzap_addent(zn, *intval); | |
1287 | } | |
1288 | } | |
1289 | ASSERT(zap == zn->zn_zap); | |
1290 | zap_name_free(zn); | |
66eead53 MA |
1291 | if (zap != NULL) /* may be NULL if fzap_add() failed */ |
1292 | zap_unlockdir(zap, tag); | |
0eef1bde | 1293 | return (err); |
1294 | } | |
1295 | ||
1296 | int | |
1297 | zap_add(objset_t *os, uint64_t zapobj, const char *key, | |
1298 | int integer_size, uint64_t num_integers, | |
1299 | const void *val, dmu_tx_t *tx) | |
1300 | { | |
1301 | zap_t *zap; | |
1302 | int err; | |
1303 | ||
1304 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); | |
1305 | if (err != 0) | |
1306 | return (err); | |
1307 | err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG); | |
1308 | /* zap_add_impl() calls zap_unlockdir() */ | |
1309 | return (err); | |
1310 | } | |
1311 | ||
1312 | int | |
1313 | zap_add_by_dnode(dnode_t *dn, const char *key, | |
1314 | int integer_size, uint64_t num_integers, | |
1315 | const void *val, dmu_tx_t *tx) | |
1316 | { | |
1317 | zap_t *zap; | |
1318 | int err; | |
1319 | ||
1320 | err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); | |
1321 | if (err != 0) | |
1322 | return (err); | |
1323 | err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG); | |
1324 | /* zap_add_impl() calls zap_unlockdir() */ | |
34dc7c2f BB |
1325 | return (err); |
1326 | } | |
1327 | ||
428870ff BB |
1328 | int |
1329 | zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1330 | int key_numints, int integer_size, uint64_t num_integers, | |
1331 | const void *val, dmu_tx_t *tx) | |
1332 | { | |
1333 | zap_t *zap; | |
428870ff | 1334 | |
d2a12f9e MA |
1335 | int err = |
1336 | zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); | |
1337 | if (err != 0) | |
428870ff | 1338 | return (err); |
d2a12f9e | 1339 | zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); |
428870ff | 1340 | if (zn == NULL) { |
8bea9815 | 1341 | zap_unlockdir(zap, FTAG); |
2e528b49 | 1342 | return (SET_ERROR(ENOTSUP)); |
428870ff | 1343 | } |
8bea9815 | 1344 | err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx); |
428870ff BB |
1345 | zap = zn->zn_zap; /* fzap_add() may change zap */ |
1346 | zap_name_free(zn); | |
1347 | if (zap != NULL) /* may be NULL if fzap_add() failed */ | |
8bea9815 | 1348 | zap_unlockdir(zap, FTAG); |
428870ff BB |
1349 | return (err); |
1350 | } | |
1351 | ||
34dc7c2f BB |
1352 | int |
1353 | zap_update(objset_t *os, uint64_t zapobj, const char *name, | |
1354 | int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) | |
1355 | { | |
1356 | zap_t *zap; | |
34dc7c2f | 1357 | const uint64_t *intval = val; |
34dc7c2f | 1358 | |
d2a12f9e MA |
1359 | int err = |
1360 | zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); | |
1361 | if (err != 0) | |
34dc7c2f | 1362 | return (err); |
9dcdee78 | 1363 | zap_name_t *zn = zap_name_alloc_str(zap, name, 0); |
34dc7c2f | 1364 | if (zn == NULL) { |
8bea9815 | 1365 | zap_unlockdir(zap, FTAG); |
2e528b49 | 1366 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
1367 | } |
1368 | if (!zap->zap_ismicro) { | |
8bea9815 MA |
1369 | err = fzap_update(zn, integer_size, num_integers, val, |
1370 | FTAG, tx); | |
34dc7c2f BB |
1371 | zap = zn->zn_zap; /* fzap_update() may change zap */ |
1372 | } else if (integer_size != 8 || num_integers != 1 || | |
1373 | strlen(name) >= MZAP_NAME_LEN) { | |
1374 | dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", | |
8e739b2c RE |
1375 | (u_longlong_t)zapobj, integer_size, |
1376 | (u_longlong_t)num_integers, name); | |
8bea9815 MA |
1377 | err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0); |
1378 | if (err == 0) { | |
34dc7c2f | 1379 | err = fzap_update(zn, integer_size, num_integers, |
8bea9815 MA |
1380 | val, FTAG, tx); |
1381 | } | |
34dc7c2f BB |
1382 | zap = zn->zn_zap; /* fzap_update() may change zap */ |
1383 | } else { | |
9dcdee78 AM |
1384 | zfs_btree_index_t idx; |
1385 | mzap_ent_t *mze = mze_find(zn, &idx); | |
34dc7c2f | 1386 | if (mze != NULL) { |
428870ff | 1387 | MZE_PHYS(zap, mze)->mze_value = *intval; |
34dc7c2f BB |
1388 | } else { |
1389 | mzap_addent(zn, *intval); | |
1390 | } | |
1391 | } | |
1392 | ASSERT(zap == zn->zn_zap); | |
1393 | zap_name_free(zn); | |
1394 | if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ | |
8bea9815 | 1395 | zap_unlockdir(zap, FTAG); |
34dc7c2f BB |
1396 | return (err); |
1397 | } | |
1398 | ||
428870ff BB |
1399 | int |
1400 | zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1401 | int key_numints, | |
1402 | int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) | |
1403 | { | |
1404 | zap_t *zap; | |
428870ff | 1405 | |
d2a12f9e MA |
1406 | int err = |
1407 | zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); | |
1408 | if (err != 0) | |
428870ff | 1409 | return (err); |
d2a12f9e | 1410 | zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); |
428870ff | 1411 | if (zn == NULL) { |
8bea9815 | 1412 | zap_unlockdir(zap, FTAG); |
2e528b49 | 1413 | return (SET_ERROR(ENOTSUP)); |
428870ff | 1414 | } |
8bea9815 | 1415 | err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx); |
428870ff BB |
1416 | zap = zn->zn_zap; /* fzap_update() may change zap */ |
1417 | zap_name_free(zn); | |
1418 | if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ | |
8bea9815 | 1419 | zap_unlockdir(zap, FTAG); |
428870ff BB |
1420 | return (err); |
1421 | } | |
1422 | ||
34dc7c2f BB |
1423 | int |
1424 | zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) | |
1425 | { | |
9b7b9cd3 | 1426 | return (zap_remove_norm(os, zapobj, name, 0, tx)); |
34dc7c2f BB |
1427 | } |
1428 | ||
0eef1bde | 1429 | static int |
1430 | zap_remove_impl(zap_t *zap, const char *name, | |
34dc7c2f BB |
1431 | matchtype_t mt, dmu_tx_t *tx) |
1432 | { | |
0eef1bde | 1433 | int err = 0; |
34dc7c2f | 1434 | |
9dcdee78 | 1435 | zap_name_t *zn = zap_name_alloc_str(zap, name, mt); |
0eef1bde | 1436 | if (zn == NULL) |
2e528b49 | 1437 | return (SET_ERROR(ENOTSUP)); |
34dc7c2f BB |
1438 | if (!zap->zap_ismicro) { |
1439 | err = fzap_remove(zn, tx); | |
1440 | } else { | |
9dcdee78 AM |
1441 | zfs_btree_index_t idx; |
1442 | mzap_ent_t *mze = mze_find(zn, &idx); | |
34dc7c2f | 1443 | if (mze == NULL) { |
2e528b49 | 1444 | err = SET_ERROR(ENOENT); |
34dc7c2f BB |
1445 | } else { |
1446 | zap->zap_m.zap_num_entries--; | |
9dcdee78 AM |
1447 | memset(MZE_PHYS(zap, mze), 0, sizeof (mzap_ent_phys_t)); |
1448 | zfs_btree_remove_idx(&zap->zap_m.zap_tree, &idx); | |
34dc7c2f BB |
1449 | } |
1450 | } | |
1451 | zap_name_free(zn); | |
0eef1bde | 1452 | return (err); |
1453 | } | |
1454 | ||
1455 | int | |
1456 | zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, | |
1457 | matchtype_t mt, dmu_tx_t *tx) | |
1458 | { | |
1459 | zap_t *zap; | |
1460 | int err; | |
1461 | ||
1462 | err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap); | |
1463 | if (err) | |
1464 | return (err); | |
1465 | err = zap_remove_impl(zap, name, mt, tx); | |
1466 | zap_unlockdir(zap, FTAG); | |
1467 | return (err); | |
1468 | } | |
1469 | ||
1470 | int | |
1471 | zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx) | |
1472 | { | |
1473 | zap_t *zap; | |
1474 | int err; | |
1475 | ||
1476 | err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap); | |
1477 | if (err) | |
1478 | return (err); | |
9b7b9cd3 | 1479 | err = zap_remove_impl(zap, name, 0, tx); |
8bea9815 | 1480 | zap_unlockdir(zap, FTAG); |
34dc7c2f BB |
1481 | return (err); |
1482 | } | |
1483 | ||
428870ff BB |
1484 | int |
1485 | zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, | |
1486 | int key_numints, dmu_tx_t *tx) | |
1487 | { | |
1488 | zap_t *zap; | |
428870ff | 1489 | |
d2a12f9e MA |
1490 | int err = |
1491 | zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap); | |
1492 | if (err != 0) | |
428870ff | 1493 | return (err); |
d2a12f9e | 1494 | zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); |
428870ff | 1495 | if (zn == NULL) { |
8bea9815 | 1496 | zap_unlockdir(zap, FTAG); |
2e528b49 | 1497 | return (SET_ERROR(ENOTSUP)); |
428870ff BB |
1498 | } |
1499 | err = fzap_remove(zn, tx); | |
1500 | zap_name_free(zn); | |
8bea9815 | 1501 | zap_unlockdir(zap, FTAG); |
428870ff BB |
1502 | return (err); |
1503 | } | |
1504 | ||
34dc7c2f BB |
1505 | /* |
1506 | * Routines for iterating over the attributes. | |
1507 | */ | |
1508 | ||
d9b4bf06 MA |
1509 | static void |
1510 | zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, | |
1511 | uint64_t serialized, boolean_t prefetch) | |
34dc7c2f BB |
1512 | { |
1513 | zc->zc_objset = os; | |
1514 | zc->zc_zap = NULL; | |
1515 | zc->zc_leaf = NULL; | |
1516 | zc->zc_zapobj = zapobj; | |
428870ff BB |
1517 | zc->zc_serialized = serialized; |
1518 | zc->zc_hash = 0; | |
1519 | zc->zc_cd = 0; | |
d9b4bf06 MA |
1520 | zc->zc_prefetch = prefetch; |
1521 | } | |
1522 | void | |
1523 | zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, | |
1524 | uint64_t serialized) | |
1525 | { | |
1526 | zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE); | |
34dc7c2f BB |
1527 | } |
1528 | ||
d9b4bf06 MA |
1529 | /* |
1530 | * Initialize a cursor at the beginning of the ZAP object. The entire | |
1531 | * ZAP object will be prefetched. | |
1532 | */ | |
34dc7c2f BB |
1533 | void |
1534 | zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) | |
1535 | { | |
d9b4bf06 MA |
1536 | zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE); |
1537 | } | |
1538 | ||
1539 | /* | |
1540 | * Initialize a cursor at the beginning, but request that we not prefetch | |
1541 | * the entire ZAP object. | |
1542 | */ | |
1543 | void | |
1544 | zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) | |
1545 | { | |
1546 | zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE); | |
34dc7c2f BB |
1547 | } |
1548 | ||
1549 | void | |
1550 | zap_cursor_fini(zap_cursor_t *zc) | |
1551 | { | |
1552 | if (zc->zc_zap) { | |
1553 | rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); | |
8bea9815 | 1554 | zap_unlockdir(zc->zc_zap, NULL); |
34dc7c2f BB |
1555 | zc->zc_zap = NULL; |
1556 | } | |
1557 | if (zc->zc_leaf) { | |
1558 | rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); | |
1559 | zap_put_leaf(zc->zc_leaf); | |
1560 | zc->zc_leaf = NULL; | |
1561 | } | |
1562 | zc->zc_objset = NULL; | |
1563 | } | |
1564 | ||
1565 | uint64_t | |
1566 | zap_cursor_serialize(zap_cursor_t *zc) | |
1567 | { | |
1568 | if (zc->zc_hash == -1ULL) | |
1569 | return (-1ULL); | |
428870ff BB |
1570 | if (zc->zc_zap == NULL) |
1571 | return (zc->zc_serialized); | |
1572 | ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); | |
1573 | ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); | |
1574 | ||
1575 | /* | |
1576 | * We want to keep the high 32 bits of the cursor zero if we can, so | |
1577 | * that 32-bit programs can access this. So usually use a small | |
1578 | * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits | |
1579 | * of the cursor. | |
1580 | * | |
1581 | * [ collision differentiator | zap_hashbits()-bit hash value ] | |
1582 | */ | |
1583 | return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | | |
1584 | ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); | |
34dc7c2f BB |
1585 | } |
1586 | ||
1587 | int | |
1588 | zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) | |
1589 | { | |
1590 | int err; | |
34dc7c2f BB |
1591 | |
1592 | if (zc->zc_hash == -1ULL) | |
2e528b49 | 1593 | return (SET_ERROR(ENOENT)); |
34dc7c2f BB |
1594 | |
1595 | if (zc->zc_zap == NULL) { | |
428870ff | 1596 | int hb; |
34dc7c2f | 1597 | err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, |
8bea9815 | 1598 | RW_READER, TRUE, FALSE, NULL, &zc->zc_zap); |
d2a12f9e | 1599 | if (err != 0) |
34dc7c2f | 1600 | return (err); |
428870ff BB |
1601 | |
1602 | /* | |
1603 | * To support zap_cursor_init_serialized, advance, retrieve, | |
1604 | * we must add to the existing zc_cd, which may already | |
1605 | * be 1 due to the zap_cursor_advance. | |
1606 | */ | |
1607 | ASSERT(zc->zc_hash == 0); | |
1608 | hb = zap_hashbits(zc->zc_zap); | |
1609 | zc->zc_hash = zc->zc_serialized << (64 - hb); | |
1610 | zc->zc_cd += zc->zc_serialized >> hb; | |
1611 | if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ | |
1612 | zc->zc_cd = 0; | |
34dc7c2f BB |
1613 | } else { |
1614 | rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); | |
1615 | } | |
1616 | if (!zc->zc_zap->zap_ismicro) { | |
1617 | err = fzap_cursor_retrieve(zc->zc_zap, zc, za); | |
1618 | } else { | |
9dcdee78 | 1619 | zfs_btree_index_t idx; |
d2a12f9e MA |
1620 | mzap_ent_t mze_tofind; |
1621 | ||
9dcdee78 | 1622 | mze_tofind.mze_hash = zc->zc_hash >> 32; |
428870ff | 1623 | mze_tofind.mze_cd = zc->zc_cd; |
34dc7c2f | 1624 | |
9dcdee78 AM |
1625 | mzap_ent_t *mze = zfs_btree_find(&zc->zc_zap->zap_m.zap_tree, |
1626 | &mze_tofind, &idx); | |
34dc7c2f | 1627 | if (mze == NULL) { |
9dcdee78 AM |
1628 | mze = zfs_btree_next(&zc->zc_zap->zap_m.zap_tree, |
1629 | &idx, &idx); | |
34dc7c2f BB |
1630 | } |
1631 | if (mze) { | |
428870ff BB |
1632 | mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); |
1633 | ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); | |
34dc7c2f | 1634 | za->za_normalization_conflict = |
9dcdee78 AM |
1635 | mzap_normalization_conflict(zc->zc_zap, NULL, |
1636 | mze, &idx); | |
34dc7c2f BB |
1637 | za->za_integer_length = 8; |
1638 | za->za_num_integers = 1; | |
428870ff | 1639 | za->za_first_integer = mzep->mze_value; |
c9e319fa JL |
1640 | (void) strlcpy(za->za_name, mzep->mze_name, |
1641 | sizeof (za->za_name)); | |
9dcdee78 | 1642 | zc->zc_hash = (uint64_t)mze->mze_hash << 32; |
428870ff | 1643 | zc->zc_cd = mze->mze_cd; |
34dc7c2f BB |
1644 | err = 0; |
1645 | } else { | |
1646 | zc->zc_hash = -1ULL; | |
2e528b49 | 1647 | err = SET_ERROR(ENOENT); |
34dc7c2f BB |
1648 | } |
1649 | } | |
1650 | rw_exit(&zc->zc_zap->zap_rwlock); | |
1651 | return (err); | |
1652 | } | |
1653 | ||
1654 | void | |
1655 | zap_cursor_advance(zap_cursor_t *zc) | |
1656 | { | |
1657 | if (zc->zc_hash == -1ULL) | |
1658 | return; | |
1659 | zc->zc_cd++; | |
428870ff BB |
1660 | } |
1661 | ||
34dc7c2f BB |
1662 | int |
1663 | zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) | |
1664 | { | |
34dc7c2f BB |
1665 | zap_t *zap; |
1666 | ||
d2a12f9e MA |
1667 | int err = |
1668 | zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); | |
1669 | if (err != 0) | |
34dc7c2f BB |
1670 | return (err); |
1671 | ||
861166b0 | 1672 | memset(zs, 0, sizeof (zap_stats_t)); |
34dc7c2f BB |
1673 | |
1674 | if (zap->zap_ismicro) { | |
1675 | zs->zs_blocksize = zap->zap_dbuf->db_size; | |
1676 | zs->zs_num_entries = zap->zap_m.zap_num_entries; | |
1677 | zs->zs_num_blocks = 1; | |
1678 | } else { | |
1679 | fzap_get_stats(zap, zs); | |
1680 | } | |
8bea9815 | 1681 | zap_unlockdir(zap, FTAG); |
34dc7c2f BB |
1682 | return (0); |
1683 | } | |
9babb374 | 1684 | |
93ce2b4c | 1685 | #if defined(_KERNEL) |
c28b2279 | 1686 | EXPORT_SYMBOL(zap_create); |
50c957f7 | 1687 | EXPORT_SYMBOL(zap_create_dnsize); |
dee28b07 | 1688 | EXPORT_SYMBOL(zap_create_norm); |
50c957f7 | 1689 | EXPORT_SYMBOL(zap_create_norm_dnsize); |
dee28b07 | 1690 | EXPORT_SYMBOL(zap_create_flags); |
50c957f7 | 1691 | EXPORT_SYMBOL(zap_create_flags_dnsize); |
dee28b07 BB |
1692 | EXPORT_SYMBOL(zap_create_claim); |
1693 | EXPORT_SYMBOL(zap_create_claim_norm); | |
50c957f7 | 1694 | EXPORT_SYMBOL(zap_create_claim_norm_dnsize); |
6955b401 | 1695 | EXPORT_SYMBOL(zap_create_hold); |
dee28b07 | 1696 | EXPORT_SYMBOL(zap_destroy); |
c28b2279 | 1697 | EXPORT_SYMBOL(zap_lookup); |
0eef1bde | 1698 | EXPORT_SYMBOL(zap_lookup_by_dnode); |
c28b2279 | 1699 | EXPORT_SYMBOL(zap_lookup_norm); |
dee28b07 BB |
1700 | EXPORT_SYMBOL(zap_lookup_uint64); |
1701 | EXPORT_SYMBOL(zap_contains); | |
07248450 | 1702 | EXPORT_SYMBOL(zap_prefetch); |
dee28b07 | 1703 | EXPORT_SYMBOL(zap_prefetch_uint64); |
dee28b07 | 1704 | EXPORT_SYMBOL(zap_add); |
0eef1bde | 1705 | EXPORT_SYMBOL(zap_add_by_dnode); |
dee28b07 | 1706 | EXPORT_SYMBOL(zap_add_uint64); |
c28b2279 | 1707 | EXPORT_SYMBOL(zap_update); |
dee28b07 BB |
1708 | EXPORT_SYMBOL(zap_update_uint64); |
1709 | EXPORT_SYMBOL(zap_length); | |
1710 | EXPORT_SYMBOL(zap_length_uint64); | |
1711 | EXPORT_SYMBOL(zap_remove); | |
0eef1bde | 1712 | EXPORT_SYMBOL(zap_remove_by_dnode); |
dee28b07 BB |
1713 | EXPORT_SYMBOL(zap_remove_norm); |
1714 | EXPORT_SYMBOL(zap_remove_uint64); | |
1715 | EXPORT_SYMBOL(zap_count); | |
1716 | EXPORT_SYMBOL(zap_value_search); | |
1717 | EXPORT_SYMBOL(zap_join); | |
1718 | EXPORT_SYMBOL(zap_join_increment); | |
1719 | EXPORT_SYMBOL(zap_add_int); | |
1720 | EXPORT_SYMBOL(zap_remove_int); | |
1721 | EXPORT_SYMBOL(zap_lookup_int); | |
1722 | EXPORT_SYMBOL(zap_increment_int); | |
1723 | EXPORT_SYMBOL(zap_add_int_key); | |
1724 | EXPORT_SYMBOL(zap_lookup_int_key); | |
1725 | EXPORT_SYMBOL(zap_increment); | |
1726 | EXPORT_SYMBOL(zap_cursor_init); | |
1727 | EXPORT_SYMBOL(zap_cursor_fini); | |
1728 | EXPORT_SYMBOL(zap_cursor_retrieve); | |
1729 | EXPORT_SYMBOL(zap_cursor_advance); | |
1730 | EXPORT_SYMBOL(zap_cursor_serialize); | |
dee28b07 BB |
1731 | EXPORT_SYMBOL(zap_cursor_init_serialized); |
1732 | EXPORT_SYMBOL(zap_get_stats); | |
a4b21ead MP |
1733 | |
1734 | /* CSTYLED */ | |
1735 | ZFS_MODULE_PARAM(zfs, , zap_micro_max_size, INT, ZMOD_RW, | |
1736 | "Maximum micro ZAP size, before converting to a fat ZAP, in bytes"); | |
c28b2279 | 1737 | #endif |