]> git.proxmox.com Git - mirror_zfs.git/blame - module/zfs/zap_leaf.c
Rebase master to b117
[mirror_zfs.git] / module / zfs / zap_leaf.c
CommitLineData
34dc7c2f
BB
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
9babb374 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
34dc7c2f
BB
23 * Use is subject to license terms.
24 */
25
34dc7c2f
BB
26/*
27 * The 512-byte leaf is broken into 32 16-byte chunks.
28 * chunk number n means l_chunk[n], even though the header precedes it.
29 * the names are stored null-terminated.
30 */
31
9babb374
BB
32#include <sys/spa.h>
33#include <sys/dmu.h>
34dc7c2f 34#include <sys/zfs_context.h>
9babb374 35#include <sys/fs/zfs.h>
34dc7c2f
BB
36#include <sys/zap.h>
37#include <sys/zap_impl.h>
38#include <sys/zap_leaf.h>
34dc7c2f
BB
39
40static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
41
42#define CHAIN_END 0xffff /* end of the chunk chain */
43
44/* half the (current) minimum block size */
45#define MAX_ARRAY_BYTES (8<<10)
46
47#define LEAF_HASH(l, h) \
48 ((ZAP_LEAF_HASH_NUMENTRIES(l)-1) & \
49 ((h) >> (64 - ZAP_LEAF_HASH_SHIFT(l)-(l)->l_phys->l_hdr.lh_prefix_len)))
50
51#define LEAF_HASH_ENTPTR(l, h) (&(l)->l_phys->l_hash[LEAF_HASH(l, h)])
52
53
54static void
55zap_memset(void *a, int c, size_t n)
56{
57 char *cp = a;
58 char *cpend = cp + n;
59
60 while (cp < cpend)
61 *cp++ = c;
62}
63
64static void
65stv(int len, void *addr, uint64_t value)
66{
67 switch (len) {
68 case 1:
69 *(uint8_t *)addr = value;
70 return;
71 case 2:
72 *(uint16_t *)addr = value;
73 return;
74 case 4:
75 *(uint32_t *)addr = value;
76 return;
77 case 8:
78 *(uint64_t *)addr = value;
79 return;
80 }
81 ASSERT(!"bad int len");
82}
83
84static uint64_t
85ldv(int len, const void *addr)
86{
87 switch (len) {
88 case 1:
89 return (*(uint8_t *)addr);
90 case 2:
91 return (*(uint16_t *)addr);
92 case 4:
93 return (*(uint32_t *)addr);
94 case 8:
95 return (*(uint64_t *)addr);
96 }
97 ASSERT(!"bad int len");
98 return (0xFEEDFACEDEADBEEFULL);
99}
100
101void
102zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
103{
104 int i;
105 zap_leaf_t l;
106 l.l_bs = highbit(size)-1;
107 l.l_phys = buf;
108
109 buf->l_hdr.lh_block_type = BSWAP_64(buf->l_hdr.lh_block_type);
110 buf->l_hdr.lh_prefix = BSWAP_64(buf->l_hdr.lh_prefix);
111 buf->l_hdr.lh_magic = BSWAP_32(buf->l_hdr.lh_magic);
112 buf->l_hdr.lh_nfree = BSWAP_16(buf->l_hdr.lh_nfree);
113 buf->l_hdr.lh_nentries = BSWAP_16(buf->l_hdr.lh_nentries);
114 buf->l_hdr.lh_prefix_len = BSWAP_16(buf->l_hdr.lh_prefix_len);
115 buf->l_hdr.lh_freelist = BSWAP_16(buf->l_hdr.lh_freelist);
116
117 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
118 buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
119
120 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(&l); i++) {
121 zap_leaf_chunk_t *lc = &ZAP_LEAF_CHUNK(&l, i);
122 struct zap_leaf_entry *le;
123
124 switch (lc->l_free.lf_type) {
125 case ZAP_CHUNK_ENTRY:
126 le = &lc->l_entry;
127
128 le->le_type = BSWAP_8(le->le_type);
129 le->le_int_size = BSWAP_8(le->le_int_size);
130 le->le_next = BSWAP_16(le->le_next);
131 le->le_name_chunk = BSWAP_16(le->le_name_chunk);
132 le->le_name_length = BSWAP_16(le->le_name_length);
133 le->le_value_chunk = BSWAP_16(le->le_value_chunk);
134 le->le_value_length = BSWAP_16(le->le_value_length);
135 le->le_cd = BSWAP_32(le->le_cd);
136 le->le_hash = BSWAP_64(le->le_hash);
137 break;
138 case ZAP_CHUNK_FREE:
139 lc->l_free.lf_type = BSWAP_8(lc->l_free.lf_type);
140 lc->l_free.lf_next = BSWAP_16(lc->l_free.lf_next);
141 break;
142 case ZAP_CHUNK_ARRAY:
143 lc->l_array.la_type = BSWAP_8(lc->l_array.la_type);
144 lc->l_array.la_next = BSWAP_16(lc->l_array.la_next);
145 /* la_array doesn't need swapping */
146 break;
147 default:
148 ASSERT(!"bad leaf type");
149 }
150 }
151}
152
153void
154zap_leaf_init(zap_leaf_t *l, boolean_t sort)
155{
156 int i;
157
158 l->l_bs = highbit(l->l_dbuf->db_size)-1;
159 zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header));
160 zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
161 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
162 ZAP_LEAF_CHUNK(l, i).l_free.lf_type = ZAP_CHUNK_FREE;
163 ZAP_LEAF_CHUNK(l, i).l_free.lf_next = i+1;
164 }
165 ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)-1).l_free.lf_next = CHAIN_END;
166 l->l_phys->l_hdr.lh_block_type = ZBT_LEAF;
167 l->l_phys->l_hdr.lh_magic = ZAP_LEAF_MAGIC;
168 l->l_phys->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l);
169 if (sort)
170 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
171}
172
173/*
174 * Routines which manipulate leaf chunks (l_chunk[]).
175 */
176
177static uint16_t
178zap_leaf_chunk_alloc(zap_leaf_t *l)
179{
180 int chunk;
181
182 ASSERT(l->l_phys->l_hdr.lh_nfree > 0);
183
184 chunk = l->l_phys->l_hdr.lh_freelist;
185 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
186 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_free.lf_type, ==, ZAP_CHUNK_FREE);
187
188 l->l_phys->l_hdr.lh_freelist = ZAP_LEAF_CHUNK(l, chunk).l_free.lf_next;
189
190 l->l_phys->l_hdr.lh_nfree--;
191
192 return (chunk);
193}
194
195static void
196zap_leaf_chunk_free(zap_leaf_t *l, uint16_t chunk)
197{
198 struct zap_leaf_free *zlf = &ZAP_LEAF_CHUNK(l, chunk).l_free;
199 ASSERT3U(l->l_phys->l_hdr.lh_nfree, <, ZAP_LEAF_NUMCHUNKS(l));
200 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
201 ASSERT(zlf->lf_type != ZAP_CHUNK_FREE);
202
203 zlf->lf_type = ZAP_CHUNK_FREE;
204 zlf->lf_next = l->l_phys->l_hdr.lh_freelist;
205 bzero(zlf->lf_pad, sizeof (zlf->lf_pad)); /* help it to compress */
206 l->l_phys->l_hdr.lh_freelist = chunk;
207
208 l->l_phys->l_hdr.lh_nfree++;
209}
210
211/*
212 * Routines which manipulate leaf arrays (zap_leaf_array type chunks).
213 */
214
215static uint16_t
216zap_leaf_array_create(zap_leaf_t *l, const char *buf,
217 int integer_size, int num_integers)
218{
219 uint16_t chunk_head;
220 uint16_t *chunkp = &chunk_head;
221 int byten = 0;
222 uint64_t value;
223 int shift = (integer_size-1)*8;
224 int len = num_integers;
225
226 ASSERT3U(num_integers * integer_size, <, MAX_ARRAY_BYTES);
227
228 while (len > 0) {
229 uint16_t chunk = zap_leaf_chunk_alloc(l);
230 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
231 int i;
232
233 la->la_type = ZAP_CHUNK_ARRAY;
234 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES; i++) {
235 if (byten == 0)
236 value = ldv(integer_size, buf);
237 la->la_array[i] = value >> shift;
238 value <<= 8;
239 if (++byten == integer_size) {
240 byten = 0;
241 buf += integer_size;
242 if (--len == 0)
243 break;
244 }
245 }
246
247 *chunkp = chunk;
248 chunkp = &la->la_next;
249 }
250 *chunkp = CHAIN_END;
251
252 return (chunk_head);
253}
254
255static void
256zap_leaf_array_free(zap_leaf_t *l, uint16_t *chunkp)
257{
258 uint16_t chunk = *chunkp;
259
260 *chunkp = CHAIN_END;
261
262 while (chunk != CHAIN_END) {
263 int nextchunk = ZAP_LEAF_CHUNK(l, chunk).l_array.la_next;
264 ASSERT3U(ZAP_LEAF_CHUNK(l, chunk).l_array.la_type, ==,
265 ZAP_CHUNK_ARRAY);
266 zap_leaf_chunk_free(l, chunk);
267 chunk = nextchunk;
268 }
269}
270
271/* array_len and buf_len are in integers, not bytes */
272static void
273zap_leaf_array_read(zap_leaf_t *l, uint16_t chunk,
274 int array_int_len, int array_len, int buf_int_len, uint64_t buf_len,
275 char *buf)
276{
277 int len = MIN(array_len, buf_len);
278 int byten = 0;
279 uint64_t value = 0;
280
281 ASSERT3U(array_int_len, <=, buf_int_len);
282
283 /* Fast path for one 8-byte integer */
284 if (array_int_len == 8 && buf_int_len == 8 && len == 1) {
285 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
286 uint8_t *ip = la->la_array;
287 uint64_t *buf64 = (uint64_t *)buf;
288
289 *buf64 = (uint64_t)ip[0] << 56 | (uint64_t)ip[1] << 48 |
290 (uint64_t)ip[2] << 40 | (uint64_t)ip[3] << 32 |
291 (uint64_t)ip[4] << 24 | (uint64_t)ip[5] << 16 |
292 (uint64_t)ip[6] << 8 | (uint64_t)ip[7];
293 return;
294 }
295
296 /* Fast path for an array of 1-byte integers (eg. the entry name) */
297 if (array_int_len == 1 && buf_int_len == 1 &&
298 buf_len > array_len + ZAP_LEAF_ARRAY_BYTES) {
299 while (chunk != CHAIN_END) {
300 struct zap_leaf_array *la =
301 &ZAP_LEAF_CHUNK(l, chunk).l_array;
302 bcopy(la->la_array, buf, ZAP_LEAF_ARRAY_BYTES);
303 buf += ZAP_LEAF_ARRAY_BYTES;
304 chunk = la->la_next;
305 }
306 return;
307 }
308
309 while (len > 0) {
310 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
311 int i;
312
313 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
314 for (i = 0; i < ZAP_LEAF_ARRAY_BYTES && len > 0; i++) {
315 value = (value << 8) | la->la_array[i];
316 byten++;
317 if (byten == array_int_len) {
318 stv(buf_int_len, buf, value);
319 byten = 0;
320 len--;
321 if (len == 0)
322 return;
323 buf += buf_int_len;
324 }
325 }
326 chunk = la->la_next;
327 }
328}
329
330/*
331 * Only to be used on 8-bit arrays.
332 * array_len is actual len in bytes (not encoded le_value_length).
333 * namenorm is null-terminated.
334 */
335static boolean_t
336zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn, int chunk, int array_len)
337{
338 int bseen = 0;
339
340 if (zn->zn_matchtype == MT_FIRST) {
341 char *thisname = kmem_alloc(array_len, KM_SLEEP);
342 boolean_t match;
343
344 zap_leaf_array_read(l, chunk, 1, array_len, 1,
345 array_len, thisname);
346 match = zap_match(zn, thisname);
347 kmem_free(thisname, array_len);
348 return (match);
349 }
350
351 /* Fast path for exact matching */
352 while (bseen < array_len) {
353 struct zap_leaf_array *la = &ZAP_LEAF_CHUNK(l, chunk).l_array;
354 int toread = MIN(array_len - bseen, ZAP_LEAF_ARRAY_BYTES);
355 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
356 if (bcmp(la->la_array, zn->zn_name_orij + bseen, toread))
357 break;
358 chunk = la->la_next;
359 bseen += toread;
360 }
361 return (bseen == array_len);
362}
363
364/*
365 * Routines which manipulate leaf entries.
366 */
367
368int
369zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
370{
371 uint16_t *chunkp;
372 struct zap_leaf_entry *le;
373
374 ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
375
376again:
377 for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
378 *chunkp != CHAIN_END; chunkp = &le->le_next) {
379 uint16_t chunk = *chunkp;
380 le = ZAP_LEAF_ENTRY(l, chunk);
381
382 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
383 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
384
385 if (le->le_hash != zn->zn_hash)
386 continue;
387
388 /*
389 * NB: the entry chain is always sorted by cd on
390 * normalized zap objects, so this will find the
391 * lowest-cd match for MT_FIRST.
392 */
393 ASSERT(zn->zn_matchtype == MT_EXACT ||
394 (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
395 if (zap_leaf_array_match(l, zn, le->le_name_chunk,
396 le->le_name_length)) {
397 zeh->zeh_num_integers = le->le_value_length;
398 zeh->zeh_integer_size = le->le_int_size;
399 zeh->zeh_cd = le->le_cd;
400 zeh->zeh_hash = le->le_hash;
401 zeh->zeh_chunkp = chunkp;
402 zeh->zeh_leaf = l;
403 return (0);
404 }
405 }
406
407 /*
408 * NB: we could of course do this in one pass, but that would be
409 * a pain. We'll see if MT_BEST is even used much.
410 */
411 if (zn->zn_matchtype == MT_BEST) {
412 zn->zn_matchtype = MT_FIRST;
413 goto again;
414 }
415
416 return (ENOENT);
417}
418
419/* Return (h1,cd1 >= h2,cd2) */
420#define HCD_GTEQ(h1, cd1, h2, cd2) \
421 ((h1 > h2) ? TRUE : ((h1 == h2 && cd1 >= cd2) ? TRUE : FALSE))
422
423int
424zap_leaf_lookup_closest(zap_leaf_t *l,
425 uint64_t h, uint32_t cd, zap_entry_handle_t *zeh)
426{
427 uint16_t chunk;
428 uint64_t besth = -1ULL;
429 uint32_t bestcd = ZAP_MAXCD;
430 uint16_t bestlh = ZAP_LEAF_HASH_NUMENTRIES(l)-1;
431 uint16_t lh;
432 struct zap_leaf_entry *le;
433
434 ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
435
436 for (lh = LEAF_HASH(l, h); lh <= bestlh; lh++) {
437 for (chunk = l->l_phys->l_hash[lh];
438 chunk != CHAIN_END; chunk = le->le_next) {
439 le = ZAP_LEAF_ENTRY(l, chunk);
440
441 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
442 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
443
444 if (HCD_GTEQ(le->le_hash, le->le_cd, h, cd) &&
445 HCD_GTEQ(besth, bestcd, le->le_hash, le->le_cd)) {
446 ASSERT3U(bestlh, >=, lh);
447 bestlh = lh;
448 besth = le->le_hash;
449 bestcd = le->le_cd;
450
451 zeh->zeh_num_integers = le->le_value_length;
452 zeh->zeh_integer_size = le->le_int_size;
453 zeh->zeh_cd = le->le_cd;
454 zeh->zeh_hash = le->le_hash;
455 zeh->zeh_fakechunk = chunk;
456 zeh->zeh_chunkp = &zeh->zeh_fakechunk;
457 zeh->zeh_leaf = l;
458 }
459 }
460 }
461
462 return (bestcd == ZAP_MAXCD ? ENOENT : 0);
463}
464
465int
466zap_entry_read(const zap_entry_handle_t *zeh,
467 uint8_t integer_size, uint64_t num_integers, void *buf)
468{
469 struct zap_leaf_entry *le =
470 ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
471 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
472
473 if (le->le_int_size > integer_size)
474 return (EINVAL);
475
476 zap_leaf_array_read(zeh->zeh_leaf, le->le_value_chunk, le->le_int_size,
477 le->le_value_length, integer_size, num_integers, buf);
478
479 if (zeh->zeh_num_integers > num_integers)
480 return (EOVERFLOW);
481 return (0);
482
483}
484
485int
486zap_entry_read_name(const zap_entry_handle_t *zeh, uint16_t buflen, char *buf)
487{
488 struct zap_leaf_entry *le =
489 ZAP_LEAF_ENTRY(zeh->zeh_leaf, *zeh->zeh_chunkp);
490 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
491
492 zap_leaf_array_read(zeh->zeh_leaf, le->le_name_chunk, 1,
493 le->le_name_length, 1, buflen, buf);
494 if (le->le_name_length > buflen)
495 return (EOVERFLOW);
496 return (0);
497}
498
499int
500zap_entry_update(zap_entry_handle_t *zeh,
501 uint8_t integer_size, uint64_t num_integers, const void *buf)
502{
503 int delta_chunks;
504 zap_leaf_t *l = zeh->zeh_leaf;
505 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, *zeh->zeh_chunkp);
506
507 delta_chunks = ZAP_LEAF_ARRAY_NCHUNKS(num_integers * integer_size) -
508 ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length * le->le_int_size);
509
510 if ((int)l->l_phys->l_hdr.lh_nfree < delta_chunks)
511 return (EAGAIN);
512
513 /*
514 * We should search other chained leaves (via
515 * zap_entry_remove,create?) otherwise returning EAGAIN will
516 * just send us into an infinite loop if we have to chain
517 * another leaf block, rather than being able to split this
518 * block.
519 */
520
521 zap_leaf_array_free(l, &le->le_value_chunk);
522 le->le_value_chunk =
523 zap_leaf_array_create(l, buf, integer_size, num_integers);
524 le->le_value_length = num_integers;
525 le->le_int_size = integer_size;
526 return (0);
527}
528
529void
530zap_entry_remove(zap_entry_handle_t *zeh)
531{
532 uint16_t entry_chunk;
533 struct zap_leaf_entry *le;
534 zap_leaf_t *l = zeh->zeh_leaf;
535
536 ASSERT3P(zeh->zeh_chunkp, !=, &zeh->zeh_fakechunk);
537
538 entry_chunk = *zeh->zeh_chunkp;
539 le = ZAP_LEAF_ENTRY(l, entry_chunk);
540 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
541
542 zap_leaf_array_free(l, &le->le_name_chunk);
543 zap_leaf_array_free(l, &le->le_value_chunk);
544
545 *zeh->zeh_chunkp = le->le_next;
546 zap_leaf_chunk_free(l, entry_chunk);
547
548 l->l_phys->l_hdr.lh_nentries--;
549}
550
551int
552zap_entry_create(zap_leaf_t *l, const char *name, uint64_t h, uint32_t cd,
553 uint8_t integer_size, uint64_t num_integers, const void *buf,
554 zap_entry_handle_t *zeh)
555{
556 uint16_t chunk;
557 uint16_t *chunkp;
558 struct zap_leaf_entry *le;
559 uint64_t namelen, valuelen;
560 int numchunks;
561
562 valuelen = integer_size * num_integers;
563 namelen = strlen(name) + 1;
564 ASSERT(namelen >= 2);
565
566 numchunks = 1 + ZAP_LEAF_ARRAY_NCHUNKS(namelen) +
567 ZAP_LEAF_ARRAY_NCHUNKS(valuelen);
568 if (numchunks > ZAP_LEAF_NUMCHUNKS(l))
569 return (E2BIG);
570
571 if (cd == ZAP_MAXCD) {
572 /* find the lowest unused cd */
573 if (l->l_phys->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED) {
574 cd = 0;
575
576 for (chunk = *LEAF_HASH_ENTPTR(l, h);
577 chunk != CHAIN_END; chunk = le->le_next) {
578 le = ZAP_LEAF_ENTRY(l, chunk);
579 if (le->le_cd > cd)
580 break;
581 if (le->le_hash == h) {
582 ASSERT3U(cd, ==, le->le_cd);
583 cd++;
584 }
585 }
586 } else {
587 /* old unsorted format; do it the O(n^2) way */
588 for (cd = 0; cd < ZAP_MAXCD; cd++) {
589 for (chunk = *LEAF_HASH_ENTPTR(l, h);
590 chunk != CHAIN_END; chunk = le->le_next) {
591 le = ZAP_LEAF_ENTRY(l, chunk);
592 if (le->le_hash == h &&
593 le->le_cd == cd) {
594 break;
595 }
596 }
597 /* If this cd is not in use, we are good. */
598 if (chunk == CHAIN_END)
599 break;
600 }
601 }
602 /*
603 * we would run out of space in a block before we could
604 * have ZAP_MAXCD entries
605 */
606 ASSERT3U(cd, <, ZAP_MAXCD);
607 }
608
609 if (l->l_phys->l_hdr.lh_nfree < numchunks)
610 return (EAGAIN);
611
612 /* make the entry */
613 chunk = zap_leaf_chunk_alloc(l);
614 le = ZAP_LEAF_ENTRY(l, chunk);
615 le->le_type = ZAP_CHUNK_ENTRY;
616 le->le_name_chunk = zap_leaf_array_create(l, name, 1, namelen);
617 le->le_name_length = namelen;
618 le->le_value_chunk =
619 zap_leaf_array_create(l, buf, integer_size, num_integers);
620 le->le_value_length = num_integers;
621 le->le_int_size = integer_size;
622 le->le_hash = h;
623 le->le_cd = cd;
624
625 /* link it into the hash chain */
626 /* XXX if we did the search above, we could just use that */
627 chunkp = zap_leaf_rehash_entry(l, chunk);
628
629 l->l_phys->l_hdr.lh_nentries++;
630
631 zeh->zeh_leaf = l;
632 zeh->zeh_num_integers = num_integers;
633 zeh->zeh_integer_size = le->le_int_size;
634 zeh->zeh_cd = le->le_cd;
635 zeh->zeh_hash = le->le_hash;
636 zeh->zeh_chunkp = chunkp;
637
638 return (0);
639}
640
641/*
642 * Determine if there is another entry with the same normalized form.
643 * For performance purposes, either zn or name must be provided (the
644 * other can be NULL). Note, there usually won't be any hash
645 * conflicts, in which case we don't need the concatenated/normalized
646 * form of the name. But all callers have one of these on hand anyway,
647 * so might as well take advantage. A cleaner but slower interface
648 * would accept neither argument, and compute the normalized name as
649 * needed (using zap_name_alloc(zap_entry_read_name(zeh))).
650 */
651boolean_t
652zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
653 const char *name, zap_t *zap)
654{
655 uint64_t chunk;
656 struct zap_leaf_entry *le;
657 boolean_t allocdzn = B_FALSE;
658
659 if (zap->zap_normflags == 0)
660 return (B_FALSE);
661
662 for (chunk = *LEAF_HASH_ENTPTR(zeh->zeh_leaf, zeh->zeh_hash);
663 chunk != CHAIN_END; chunk = le->le_next) {
664 le = ZAP_LEAF_ENTRY(zeh->zeh_leaf, chunk);
665 if (le->le_hash != zeh->zeh_hash)
666 continue;
667 if (le->le_cd == zeh->zeh_cd)
668 continue;
669
670 if (zn == NULL) {
671 zn = zap_name_alloc(zap, name, MT_FIRST);
672 allocdzn = B_TRUE;
673 }
674 if (zap_leaf_array_match(zeh->zeh_leaf, zn,
675 le->le_name_chunk, le->le_name_length)) {
676 if (allocdzn)
677 zap_name_free(zn);
678 return (B_TRUE);
679 }
680 }
681 if (allocdzn)
682 zap_name_free(zn);
683 return (B_FALSE);
684}
685
686/*
687 * Routines for transferring entries between leafs.
688 */
689
690static uint16_t *
691zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry)
692{
693 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, entry);
694 struct zap_leaf_entry *le2;
695 uint16_t *chunkp;
696
697 /*
698 * keep the entry chain sorted by cd
699 * NB: this will not cause problems for unsorted leafs, though
700 * it is unnecessary there.
701 */
702 for (chunkp = LEAF_HASH_ENTPTR(l, le->le_hash);
703 *chunkp != CHAIN_END; chunkp = &le2->le_next) {
704 le2 = ZAP_LEAF_ENTRY(l, *chunkp);
705 if (le2->le_cd > le->le_cd)
706 break;
707 }
708
709 le->le_next = *chunkp;
710 *chunkp = entry;
711 return (chunkp);
712}
713
714static uint16_t
715zap_leaf_transfer_array(zap_leaf_t *l, uint16_t chunk, zap_leaf_t *nl)
716{
717 uint16_t new_chunk;
718 uint16_t *nchunkp = &new_chunk;
719
720 while (chunk != CHAIN_END) {
721 uint16_t nchunk = zap_leaf_chunk_alloc(nl);
722 struct zap_leaf_array *nla =
723 &ZAP_LEAF_CHUNK(nl, nchunk).l_array;
724 struct zap_leaf_array *la =
725 &ZAP_LEAF_CHUNK(l, chunk).l_array;
726 int nextchunk = la->la_next;
727
728 ASSERT3U(chunk, <, ZAP_LEAF_NUMCHUNKS(l));
729 ASSERT3U(nchunk, <, ZAP_LEAF_NUMCHUNKS(l));
730
731 *nla = *la; /* structure assignment */
732
733 zap_leaf_chunk_free(l, chunk);
734 chunk = nextchunk;
735 *nchunkp = nchunk;
736 nchunkp = &nla->la_next;
737 }
738 *nchunkp = CHAIN_END;
739 return (new_chunk);
740}
741
742static void
743zap_leaf_transfer_entry(zap_leaf_t *l, int entry, zap_leaf_t *nl)
744{
745 struct zap_leaf_entry *le, *nle;
746 uint16_t chunk;
747
748 le = ZAP_LEAF_ENTRY(l, entry);
749 ASSERT3U(le->le_type, ==, ZAP_CHUNK_ENTRY);
750
751 chunk = zap_leaf_chunk_alloc(nl);
752 nle = ZAP_LEAF_ENTRY(nl, chunk);
753 *nle = *le; /* structure assignment */
754
755 (void) zap_leaf_rehash_entry(nl, chunk);
756
757 nle->le_name_chunk = zap_leaf_transfer_array(l, le->le_name_chunk, nl);
758 nle->le_value_chunk =
759 zap_leaf_transfer_array(l, le->le_value_chunk, nl);
760
761 zap_leaf_chunk_free(l, entry);
762
763 l->l_phys->l_hdr.lh_nentries--;
764 nl->l_phys->l_hdr.lh_nentries++;
765}
766
767/*
768 * Transfer the entries whose hash prefix ends in 1 to the new leaf.
769 */
770void
771zap_leaf_split(zap_leaf_t *l, zap_leaf_t *nl, boolean_t sort)
772{
773 int i;
774 int bit = 64 - 1 - l->l_phys->l_hdr.lh_prefix_len;
775
776 /* set new prefix and prefix_len */
777 l->l_phys->l_hdr.lh_prefix <<= 1;
778 l->l_phys->l_hdr.lh_prefix_len++;
779 nl->l_phys->l_hdr.lh_prefix = l->l_phys->l_hdr.lh_prefix | 1;
780 nl->l_phys->l_hdr.lh_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
781
782 /* break existing hash chains */
783 zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
784
785 if (sort)
786 l->l_phys->l_hdr.lh_flags |= ZLF_ENTRIES_CDSORTED;
787
788 /*
789 * Transfer entries whose hash bit 'bit' is set to nl; rehash
790 * the remaining entries
791 *
792 * NB: We could find entries via the hashtable instead. That
793 * would be O(hashents+numents) rather than O(numblks+numents),
794 * but this accesses memory more sequentially, and when we're
795 * called, the block is usually pretty full.
796 */
797 for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
798 struct zap_leaf_entry *le = ZAP_LEAF_ENTRY(l, i);
799 if (le->le_type != ZAP_CHUNK_ENTRY)
800 continue;
801
802 if (le->le_hash & (1ULL << bit))
803 zap_leaf_transfer_entry(l, i, nl);
804 else
805 (void) zap_leaf_rehash_entry(l, i);
806 }
807}
808
809void
810zap_leaf_stats(zap_t *zap, zap_leaf_t *l, zap_stats_t *zs)
811{
812 int i, n;
813
814 n = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
815 l->l_phys->l_hdr.lh_prefix_len;
816 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
817 zs->zs_leafs_with_2n_pointers[n]++;
818
819
820 n = l->l_phys->l_hdr.lh_nentries/5;
821 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
822 zs->zs_blocks_with_n5_entries[n]++;
823
824 n = ((1<<FZAP_BLOCK_SHIFT(zap)) -
825 l->l_phys->l_hdr.lh_nfree * (ZAP_LEAF_ARRAY_BYTES+1))*10 /
826 (1<<FZAP_BLOCK_SHIFT(zap));
827 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
828 zs->zs_blocks_n_tenths_full[n]++;
829
830 for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(l); i++) {
831 int nentries = 0;
832 int chunk = l->l_phys->l_hash[i];
833
834 while (chunk != CHAIN_END) {
835 struct zap_leaf_entry *le =
836 ZAP_LEAF_ENTRY(l, chunk);
837
838 n = 1 + ZAP_LEAF_ARRAY_NCHUNKS(le->le_name_length) +
839 ZAP_LEAF_ARRAY_NCHUNKS(le->le_value_length *
840 le->le_int_size);
841 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
842 zs->zs_entries_using_n_chunks[n]++;
843
844 chunk = le->le_next;
845 nentries++;
846 }
847
848 n = nentries;
849 n = MIN(n, ZAP_HISTOGRAM_SIZE-1);
850 zs->zs_buckets_with_n_entries[n]++;
851 }
852}