]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/go/parquet/internal/hashing/xxh3_memo_table.gen.go
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / go / parquet / internal / hashing / xxh3_memo_table.gen.go
1 // Code generated by xxh3_memo_table.gen.go.tmpl. DO NOT EDIT.
2
3 // Licensed to the Apache Software Foundation (ASF) under one
4 // or more contributor license agreements. See the NOTICE file
5 // distributed with this work for additional information
6 // regarding copyright ownership. The ASF licenses this file
7 // to you under the Apache License, Version 2.0 (the
8 // "License"); you may not use this file except in compliance
9 // with the License. You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18
19 package hashing
20
21 import (
22 "math"
23
24 "github.com/apache/arrow/go/v6/arrow"
25 "github.com/apache/arrow/go/v6/arrow/bitutil"
26 "github.com/apache/arrow/go/v6/parquet/internal/utils"
27 )
28
29 type payloadInt32 struct {
30 val int32
31 memoIdx int32
32 }
33
34 type entryInt32 struct {
35 h uint64
36 payload payloadInt32
37 }
38
39 func (e entryInt32) Valid() bool { return e.h != sentinel }
40
41 // Int32HashTable is a hashtable specifically for int32 that
42 // is utilized with the MemoTable to generalize interactions for easier
43 // implementation of dictionaries without losing performance.
44 type Int32HashTable struct {
45 cap uint64
46 capMask uint64
47 size uint64
48
49 entries []entryInt32
50 }
51
52 // NewInt32HashTable returns a new hash table for int32 values
53 // initialized with the passed in capacity or 32 whichever is larger.
54 func NewInt32HashTable(cap uint64) *Int32HashTable {
55 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
56 ret := &Int32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
57 ret.entries = make([]entryInt32, initCap)
58 return ret
59 }
60
61 // Reset drops all of the values in this hash table and re-initializes it
62 // with the specified initial capacity as if by calling New, but without having
63 // to reallocate the object.
64 func (h *Int32HashTable) Reset(cap uint64) {
65 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
66 h.capMask = h.cap - 1
67 h.size = 0
68 h.entries = make([]entryInt32, h.cap)
69 }
70
71 // CopyValues is used for copying the values out of the hash table into the
72 // passed in slice, in the order that they were first inserted
73 func (h *Int32HashTable) CopyValues(out []int32) {
74 h.CopyValuesSubset(0, out)
75 }
76
77 // CopyValuesSubset copies a subset of the values in the hashtable out, starting
78 // with the value at start, in the order that they were inserted.
79 func (h *Int32HashTable) CopyValuesSubset(start int, out []int32) {
80 h.VisitEntries(func(e *entryInt32) {
81 idx := e.payload.memoIdx - int32(start)
82 if idx >= 0 {
83 out[idx] = e.payload.val
84 }
85 })
86 }
87
88 func (h *Int32HashTable) WriteOut(out []byte) {
89 h.WriteOutSubset(0, out)
90 }
91
92 func (h *Int32HashTable) WriteOutSubset(start int, out []byte) {
93 data := arrow.Int32Traits.CastFromBytes(out)
94 h.VisitEntries(func(e *entryInt32) {
95 idx := e.payload.memoIdx - int32(start)
96 if idx >= 0 {
97 data[idx] = utils.ToLEInt32(e.payload.val)
98 }
99 })
100 }
101
102 func (h *Int32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
103
104 func (Int32HashTable) fixHash(v uint64) uint64 {
105 if v == sentinel {
106 return 42
107 }
108 return v
109 }
110
111 // Lookup retrieves the entry for a given hash value assuming it's payload value returns
112 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
113 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
114 func (h *Int32HashTable) Lookup(v uint64, cmp func(int32) bool) (*entryInt32, bool) {
115 idx, ok := h.lookup(v, h.capMask, cmp)
116 return &h.entries[idx], ok
117 }
118
119 func (h *Int32HashTable) lookup(v uint64, szMask uint64, cmp func(int32) bool) (uint64, bool) {
120 const perturbShift uint8 = 5
121
122 var (
123 idx uint64
124 perturb uint64
125 e *entryInt32
126 )
127
128 v = h.fixHash(v)
129 idx = v & szMask
130 perturb = (v >> uint64(perturbShift)) + 1
131
132 for {
133 e = &h.entries[idx]
134 if e.h == v && cmp(e.payload.val) {
135 return idx, true
136 }
137
138 if e.h == sentinel {
139 return idx, false
140 }
141
142 // perturbation logic inspired from CPython's set/dict object
143 // the goal is that all 64 bits of unmasked hash value eventually
144 // participate int he probing sequence, to minimize clustering
145 idx = (idx + perturb) & szMask
146 perturb = (perturb >> uint64(perturbShift)) + 1
147 }
148 }
149
150 func (h *Int32HashTable) upsize(newcap uint64) error {
151 newMask := newcap - 1
152
153 oldEntries := h.entries
154 h.entries = make([]entryInt32, newcap)
155 for _, e := range oldEntries {
156 if e.Valid() {
157 idx, _ := h.lookup(e.h, newMask, func(int32) bool { return false })
158 h.entries[idx] = e
159 }
160 }
161 h.cap = newcap
162 h.capMask = newMask
163 return nil
164 }
165
166 // Insert updates the given entry with the provided hash value, payload value and memo index.
167 // The entry pointer must have been retrieved via lookup in order to actually insert properly.
168 func (h *Int32HashTable) Insert(e *entryInt32, v uint64, val int32, memoIdx int32) error {
169 e.h = h.fixHash(v)
170 e.payload.val = val
171 e.payload.memoIdx = memoIdx
172 h.size++
173
174 if h.needUpsize() {
175 h.upsize(h.cap * uint64(loadFactor) * 2)
176 }
177 return nil
178 }
179
180 // VisitEntries will call the passed in function on each *valid* entry in the hash table,
181 // a valid entry being one which has had a value inserted into it.
182 func (h *Int32HashTable) VisitEntries(visit func(*entryInt32)) {
183 for _, e := range h.entries {
184 if e.Valid() {
185 visit(&e)
186 }
187 }
188 }
189
190 // Int32MemoTable is a wrapper over the appropriate hashtable to provide an interface
191 // conforming to the MemoTable interface defined in the encoding package for general interactions
192 // regarding dictionaries.
193 type Int32MemoTable struct {
194 tbl *Int32HashTable
195 nullIdx int32
196 }
197
198 // NewInt32MemoTable returns a new memotable with num entries pre-allocated to reduce further
199 // allocations when inserting.
200 func NewInt32MemoTable(num int64) *Int32MemoTable {
201 return &Int32MemoTable{tbl: NewInt32HashTable(uint64(num)), nullIdx: KeyNotFound}
202 }
203
204 // Reset allows this table to be re-used by dumping all the data currently in the table.
205 func (s *Int32MemoTable) Reset() {
206 s.tbl.Reset(32)
207 s.nullIdx = KeyNotFound
208 }
209
210 // Size returns the current number of inserted elements into the table including if a null
211 // has been inserted.
212 func (s *Int32MemoTable) Size() int {
213 sz := int(s.tbl.size)
214 if _, ok := s.GetNull(); ok {
215 sz++
216 }
217 return sz
218 }
219
220 // GetNull returns the index of an inserted null or KeyNotFound along with a bool
221 // that will be true if found and false if not.
222 func (s *Int32MemoTable) GetNull() (int, bool) {
223 return int(s.nullIdx), s.nullIdx != KeyNotFound
224 }
225
226 // GetOrInsertNull will return the index of the null entry or insert a null entry
227 // if one currently doesn't exist. The found value will be true if there was already
228 // a null in the table, and false if it inserted one.
229 func (s *Int32MemoTable) GetOrInsertNull() (idx int, found bool) {
230 idx, found = s.GetNull()
231 if !found {
232 idx = s.Size()
233 s.nullIdx = int32(idx)
234 }
235 return
236 }
237
238 // CopyValues will copy the values from the memo table out into the passed in slice
239 // which must be of the appropriate type.
240 func (s *Int32MemoTable) CopyValues(out interface{}) {
241 s.CopyValuesSubset(0, out)
242 }
243
244 // CopyValuesSubset is like CopyValues but only copies a subset of values starting
245 // at the provided start index
246 func (s *Int32MemoTable) CopyValuesSubset(start int, out interface{}) {
247 s.tbl.CopyValuesSubset(start, out.([]int32))
248 }
249
250 func (s *Int32MemoTable) WriteOut(out []byte) {
251 s.tbl.WriteOut(out)
252 }
253
254 func (s *Int32MemoTable) WriteOutSubset(start int, out []byte) {
255 s.tbl.WriteOutSubset(start, out)
256 }
257
258 // Get returns the index of the requested value in the hash table or KeyNotFound
259 // along with a boolean indicating if it was found or not.
260 func (s *Int32MemoTable) Get(val interface{}) (int, bool) {
261
262 h := hashInt(uint64(val.(int32)), 0)
263 if e, ok := s.tbl.Lookup(h, func(v int32) bool { return val.(int32) == v }); ok {
264 return int(e.payload.memoIdx), ok
265 }
266 return KeyNotFound, false
267 }
268
269 // GetOrInsert will return the index of the specified value in the table, or insert the
270 // value into the table and return the new index. found indicates whether or not it already
271 // existed in the table (true) or was inserted by this call (false).
272 func (s *Int32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
273
274 h := hashInt(uint64(val.(int32)), 0)
275 e, ok := s.tbl.Lookup(h, func(v int32) bool {
276 return val.(int32) == v
277 })
278
279 if ok {
280 idx = int(e.payload.memoIdx)
281 found = true
282 } else {
283 idx = s.Size()
284 s.tbl.Insert(e, h, val.(int32), int32(idx))
285 }
286 return
287 }
288
289 type payloadInt64 struct {
290 val int64
291 memoIdx int32
292 }
293
294 type entryInt64 struct {
295 h uint64
296 payload payloadInt64
297 }
298
299 func (e entryInt64) Valid() bool { return e.h != sentinel }
300
301 // Int64HashTable is a hashtable specifically for int64 that
302 // is utilized with the MemoTable to generalize interactions for easier
303 // implementation of dictionaries without losing performance.
304 type Int64HashTable struct {
305 cap uint64
306 capMask uint64
307 size uint64
308
309 entries []entryInt64
310 }
311
312 // NewInt64HashTable returns a new hash table for int64 values
313 // initialized with the passed in capacity or 32 whichever is larger.
314 func NewInt64HashTable(cap uint64) *Int64HashTable {
315 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
316 ret := &Int64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
317 ret.entries = make([]entryInt64, initCap)
318 return ret
319 }
320
321 // Reset drops all of the values in this hash table and re-initializes it
322 // with the specified initial capacity as if by calling New, but without having
323 // to reallocate the object.
324 func (h *Int64HashTable) Reset(cap uint64) {
325 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
326 h.capMask = h.cap - 1
327 h.size = 0
328 h.entries = make([]entryInt64, h.cap)
329 }
330
331 // CopyValues is used for copying the values out of the hash table into the
332 // passed in slice, in the order that they were first inserted
333 func (h *Int64HashTable) CopyValues(out []int64) {
334 h.CopyValuesSubset(0, out)
335 }
336
337 // CopyValuesSubset copies a subset of the values in the hashtable out, starting
338 // with the value at start, in the order that they were inserted.
339 func (h *Int64HashTable) CopyValuesSubset(start int, out []int64) {
340 h.VisitEntries(func(e *entryInt64) {
341 idx := e.payload.memoIdx - int32(start)
342 if idx >= 0 {
343 out[idx] = e.payload.val
344 }
345 })
346 }
347
348 func (h *Int64HashTable) WriteOut(out []byte) {
349 h.WriteOutSubset(0, out)
350 }
351
352 func (h *Int64HashTable) WriteOutSubset(start int, out []byte) {
353 data := arrow.Int64Traits.CastFromBytes(out)
354 h.VisitEntries(func(e *entryInt64) {
355 idx := e.payload.memoIdx - int32(start)
356 if idx >= 0 {
357 data[idx] = utils.ToLEInt64(e.payload.val)
358 }
359 })
360 }
361
362 func (h *Int64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
363
364 func (Int64HashTable) fixHash(v uint64) uint64 {
365 if v == sentinel {
366 return 42
367 }
368 return v
369 }
370
371 // Lookup retrieves the entry for a given hash value assuming it's payload value returns
372 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
373 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
374 func (h *Int64HashTable) Lookup(v uint64, cmp func(int64) bool) (*entryInt64, bool) {
375 idx, ok := h.lookup(v, h.capMask, cmp)
376 return &h.entries[idx], ok
377 }
378
379 func (h *Int64HashTable) lookup(v uint64, szMask uint64, cmp func(int64) bool) (uint64, bool) {
380 const perturbShift uint8 = 5
381
382 var (
383 idx uint64
384 perturb uint64
385 e *entryInt64
386 )
387
388 v = h.fixHash(v)
389 idx = v & szMask
390 perturb = (v >> uint64(perturbShift)) + 1
391
392 for {
393 e = &h.entries[idx]
394 if e.h == v && cmp(e.payload.val) {
395 return idx, true
396 }
397
398 if e.h == sentinel {
399 return idx, false
400 }
401
402 // perturbation logic inspired from CPython's set/dict object
403 // the goal is that all 64 bits of unmasked hash value eventually
404 // participate int he probing sequence, to minimize clustering
405 idx = (idx + perturb) & szMask
406 perturb = (perturb >> uint64(perturbShift)) + 1
407 }
408 }
409
410 func (h *Int64HashTable) upsize(newcap uint64) error {
411 newMask := newcap - 1
412
413 oldEntries := h.entries
414 h.entries = make([]entryInt64, newcap)
415 for _, e := range oldEntries {
416 if e.Valid() {
417 idx, _ := h.lookup(e.h, newMask, func(int64) bool { return false })
418 h.entries[idx] = e
419 }
420 }
421 h.cap = newcap
422 h.capMask = newMask
423 return nil
424 }
425
426 // Insert updates the given entry with the provided hash value, payload value and memo index.
427 // The entry pointer must have been retrieved via lookup in order to actually insert properly.
428 func (h *Int64HashTable) Insert(e *entryInt64, v uint64, val int64, memoIdx int32) error {
429 e.h = h.fixHash(v)
430 e.payload.val = val
431 e.payload.memoIdx = memoIdx
432 h.size++
433
434 if h.needUpsize() {
435 h.upsize(h.cap * uint64(loadFactor) * 2)
436 }
437 return nil
438 }
439
440 // VisitEntries will call the passed in function on each *valid* entry in the hash table,
441 // a valid entry being one which has had a value inserted into it.
442 func (h *Int64HashTable) VisitEntries(visit func(*entryInt64)) {
443 for _, e := range h.entries {
444 if e.Valid() {
445 visit(&e)
446 }
447 }
448 }
449
450 // Int64MemoTable is a wrapper over the appropriate hashtable to provide an interface
451 // conforming to the MemoTable interface defined in the encoding package for general interactions
452 // regarding dictionaries.
453 type Int64MemoTable struct {
454 tbl *Int64HashTable
455 nullIdx int32
456 }
457
458 // NewInt64MemoTable returns a new memotable with num entries pre-allocated to reduce further
459 // allocations when inserting.
460 func NewInt64MemoTable(num int64) *Int64MemoTable {
461 return &Int64MemoTable{tbl: NewInt64HashTable(uint64(num)), nullIdx: KeyNotFound}
462 }
463
464 // Reset allows this table to be re-used by dumping all the data currently in the table.
465 func (s *Int64MemoTable) Reset() {
466 s.tbl.Reset(32)
467 s.nullIdx = KeyNotFound
468 }
469
470 // Size returns the current number of inserted elements into the table including if a null
471 // has been inserted.
472 func (s *Int64MemoTable) Size() int {
473 sz := int(s.tbl.size)
474 if _, ok := s.GetNull(); ok {
475 sz++
476 }
477 return sz
478 }
479
480 // GetNull returns the index of an inserted null or KeyNotFound along with a bool
481 // that will be true if found and false if not.
482 func (s *Int64MemoTable) GetNull() (int, bool) {
483 return int(s.nullIdx), s.nullIdx != KeyNotFound
484 }
485
486 // GetOrInsertNull will return the index of the null entry or insert a null entry
487 // if one currently doesn't exist. The found value will be true if there was already
488 // a null in the table, and false if it inserted one.
489 func (s *Int64MemoTable) GetOrInsertNull() (idx int, found bool) {
490 idx, found = s.GetNull()
491 if !found {
492 idx = s.Size()
493 s.nullIdx = int32(idx)
494 }
495 return
496 }
497
498 // CopyValues will copy the values from the memo table out into the passed in slice
499 // which must be of the appropriate type.
500 func (s *Int64MemoTable) CopyValues(out interface{}) {
501 s.CopyValuesSubset(0, out)
502 }
503
504 // CopyValuesSubset is like CopyValues but only copies a subset of values starting
505 // at the provided start index
506 func (s *Int64MemoTable) CopyValuesSubset(start int, out interface{}) {
507 s.tbl.CopyValuesSubset(start, out.([]int64))
508 }
509
510 func (s *Int64MemoTable) WriteOut(out []byte) {
511 s.tbl.WriteOut(out)
512 }
513
514 func (s *Int64MemoTable) WriteOutSubset(start int, out []byte) {
515 s.tbl.WriteOutSubset(start, out)
516 }
517
518 // Get returns the index of the requested value in the hash table or KeyNotFound
519 // along with a boolean indicating if it was found or not.
520 func (s *Int64MemoTable) Get(val interface{}) (int, bool) {
521
522 h := hashInt(uint64(val.(int64)), 0)
523 if e, ok := s.tbl.Lookup(h, func(v int64) bool { return val.(int64) == v }); ok {
524 return int(e.payload.memoIdx), ok
525 }
526 return KeyNotFound, false
527 }
528
529 // GetOrInsert will return the index of the specified value in the table, or insert the
530 // value into the table and return the new index. found indicates whether or not it already
531 // existed in the table (true) or was inserted by this call (false).
532 func (s *Int64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
533
534 h := hashInt(uint64(val.(int64)), 0)
535 e, ok := s.tbl.Lookup(h, func(v int64) bool {
536 return val.(int64) == v
537 })
538
539 if ok {
540 idx = int(e.payload.memoIdx)
541 found = true
542 } else {
543 idx = s.Size()
544 s.tbl.Insert(e, h, val.(int64), int32(idx))
545 }
546 return
547 }
548
549 type payloadFloat32 struct {
550 val float32
551 memoIdx int32
552 }
553
554 type entryFloat32 struct {
555 h uint64
556 payload payloadFloat32
557 }
558
559 func (e entryFloat32) Valid() bool { return e.h != sentinel }
560
561 // Float32HashTable is a hashtable specifically for float32 that
562 // is utilized with the MemoTable to generalize interactions for easier
563 // implementation of dictionaries without losing performance.
564 type Float32HashTable struct {
565 cap uint64
566 capMask uint64
567 size uint64
568
569 entries []entryFloat32
570 }
571
572 // NewFloat32HashTable returns a new hash table for float32 values
573 // initialized with the passed in capacity or 32 whichever is larger.
574 func NewFloat32HashTable(cap uint64) *Float32HashTable {
575 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
576 ret := &Float32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
577 ret.entries = make([]entryFloat32, initCap)
578 return ret
579 }
580
581 // Reset drops all of the values in this hash table and re-initializes it
582 // with the specified initial capacity as if by calling New, but without having
583 // to reallocate the object.
584 func (h *Float32HashTable) Reset(cap uint64) {
585 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
586 h.capMask = h.cap - 1
587 h.size = 0
588 h.entries = make([]entryFloat32, h.cap)
589 }
590
591 // CopyValues is used for copying the values out of the hash table into the
592 // passed in slice, in the order that they were first inserted
593 func (h *Float32HashTable) CopyValues(out []float32) {
594 h.CopyValuesSubset(0, out)
595 }
596
597 // CopyValuesSubset copies a subset of the values in the hashtable out, starting
598 // with the value at start, in the order that they were inserted.
599 func (h *Float32HashTable) CopyValuesSubset(start int, out []float32) {
600 h.VisitEntries(func(e *entryFloat32) {
601 idx := e.payload.memoIdx - int32(start)
602 if idx >= 0 {
603 out[idx] = e.payload.val
604 }
605 })
606 }
607
608 func (h *Float32HashTable) WriteOut(out []byte) {
609 h.WriteOutSubset(0, out)
610 }
611
612 func (h *Float32HashTable) WriteOutSubset(start int, out []byte) {
613 data := arrow.Float32Traits.CastFromBytes(out)
614 h.VisitEntries(func(e *entryFloat32) {
615 idx := e.payload.memoIdx - int32(start)
616 if idx >= 0 {
617 data[idx] = utils.ToLEFloat32(e.payload.val)
618 }
619 })
620 }
621
622 func (h *Float32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
623
624 func (Float32HashTable) fixHash(v uint64) uint64 {
625 if v == sentinel {
626 return 42
627 }
628 return v
629 }
630
631 // Lookup retrieves the entry for a given hash value assuming it's payload value returns
632 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
633 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
634 func (h *Float32HashTable) Lookup(v uint64, cmp func(float32) bool) (*entryFloat32, bool) {
635 idx, ok := h.lookup(v, h.capMask, cmp)
636 return &h.entries[idx], ok
637 }
638
639 func (h *Float32HashTable) lookup(v uint64, szMask uint64, cmp func(float32) bool) (uint64, bool) {
640 const perturbShift uint8 = 5
641
642 var (
643 idx uint64
644 perturb uint64
645 e *entryFloat32
646 )
647
648 v = h.fixHash(v)
649 idx = v & szMask
650 perturb = (v >> uint64(perturbShift)) + 1
651
652 for {
653 e = &h.entries[idx]
654 if e.h == v && cmp(e.payload.val) {
655 return idx, true
656 }
657
658 if e.h == sentinel {
659 return idx, false
660 }
661
662 // perturbation logic inspired from CPython's set/dict object
663 // the goal is that all 64 bits of unmasked hash value eventually
664 // participate int he probing sequence, to minimize clustering
665 idx = (idx + perturb) & szMask
666 perturb = (perturb >> uint64(perturbShift)) + 1
667 }
668 }
669
670 func (h *Float32HashTable) upsize(newcap uint64) error {
671 newMask := newcap - 1
672
673 oldEntries := h.entries
674 h.entries = make([]entryFloat32, newcap)
675 for _, e := range oldEntries {
676 if e.Valid() {
677 idx, _ := h.lookup(e.h, newMask, func(float32) bool { return false })
678 h.entries[idx] = e
679 }
680 }
681 h.cap = newcap
682 h.capMask = newMask
683 return nil
684 }
685
686 // Insert updates the given entry with the provided hash value, payload value and memo index.
687 // The entry pointer must have been retrieved via lookup in order to actually insert properly.
688 func (h *Float32HashTable) Insert(e *entryFloat32, v uint64, val float32, memoIdx int32) error {
689 e.h = h.fixHash(v)
690 e.payload.val = val
691 e.payload.memoIdx = memoIdx
692 h.size++
693
694 if h.needUpsize() {
695 h.upsize(h.cap * uint64(loadFactor) * 2)
696 }
697 return nil
698 }
699
700 // VisitEntries will call the passed in function on each *valid* entry in the hash table,
701 // a valid entry being one which has had a value inserted into it.
702 func (h *Float32HashTable) VisitEntries(visit func(*entryFloat32)) {
703 for _, e := range h.entries {
704 if e.Valid() {
705 visit(&e)
706 }
707 }
708 }
709
710 // Float32MemoTable is a wrapper over the appropriate hashtable to provide an interface
711 // conforming to the MemoTable interface defined in the encoding package for general interactions
712 // regarding dictionaries.
713 type Float32MemoTable struct {
714 tbl *Float32HashTable
715 nullIdx int32
716 }
717
718 // NewFloat32MemoTable returns a new memotable with num entries pre-allocated to reduce further
719 // allocations when inserting.
720 func NewFloat32MemoTable(num int64) *Float32MemoTable {
721 return &Float32MemoTable{tbl: NewFloat32HashTable(uint64(num)), nullIdx: KeyNotFound}
722 }
723
724 // Reset allows this table to be re-used by dumping all the data currently in the table.
725 func (s *Float32MemoTable) Reset() {
726 s.tbl.Reset(32)
727 s.nullIdx = KeyNotFound
728 }
729
730 // Size returns the current number of inserted elements into the table including if a null
731 // has been inserted.
732 func (s *Float32MemoTable) Size() int {
733 sz := int(s.tbl.size)
734 if _, ok := s.GetNull(); ok {
735 sz++
736 }
737 return sz
738 }
739
740 // GetNull returns the index of an inserted null or KeyNotFound along with a bool
741 // that will be true if found and false if not.
742 func (s *Float32MemoTable) GetNull() (int, bool) {
743 return int(s.nullIdx), s.nullIdx != KeyNotFound
744 }
745
746 // GetOrInsertNull will return the index of the null entry or insert a null entry
747 // if one currently doesn't exist. The found value will be true if there was already
748 // a null in the table, and false if it inserted one.
749 func (s *Float32MemoTable) GetOrInsertNull() (idx int, found bool) {
750 idx, found = s.GetNull()
751 if !found {
752 idx = s.Size()
753 s.nullIdx = int32(idx)
754 }
755 return
756 }
757
758 // CopyValues will copy the values from the memo table out into the passed in slice
759 // which must be of the appropriate type.
760 func (s *Float32MemoTable) CopyValues(out interface{}) {
761 s.CopyValuesSubset(0, out)
762 }
763
764 // CopyValuesSubset is like CopyValues but only copies a subset of values starting
765 // at the provided start index
766 func (s *Float32MemoTable) CopyValuesSubset(start int, out interface{}) {
767 s.tbl.CopyValuesSubset(start, out.([]float32))
768 }
769
770 func (s *Float32MemoTable) WriteOut(out []byte) {
771 s.tbl.WriteOut(out)
772 }
773
774 func (s *Float32MemoTable) WriteOutSubset(start int, out []byte) {
775 s.tbl.WriteOutSubset(start, out)
776 }
777
778 // Get returns the index of the requested value in the hash table or KeyNotFound
779 // along with a boolean indicating if it was found or not.
780 func (s *Float32MemoTable) Get(val interface{}) (int, bool) {
781 var cmp func(float32) bool
782
783 if math.IsNaN(float64(val.(float32))) {
784 cmp = isNan32Cmp
785 // use consistent internal bit pattern for NaN regardless of the pattern
786 // that is passed to us. NaN is NaN is NaN
787 val = float32(math.NaN())
788 } else {
789 cmp = func(v float32) bool { return val.(float32) == v }
790 }
791
792 h := hashFloat32(val.(float32), 0)
793 if e, ok := s.tbl.Lookup(h, cmp); ok {
794 return int(e.payload.memoIdx), ok
795 }
796 return KeyNotFound, false
797 }
798
799 // GetOrInsert will return the index of the specified value in the table, or insert the
800 // value into the table and return the new index. found indicates whether or not it already
801 // existed in the table (true) or was inserted by this call (false).
802 func (s *Float32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
803
804 var cmp func(float32) bool
805
806 if math.IsNaN(float64(val.(float32))) {
807 cmp = isNan32Cmp
808 // use consistent internal bit pattern for NaN regardless of the pattern
809 // that is passed to us. NaN is NaN is NaN
810 val = float32(math.NaN())
811 } else {
812 cmp = func(v float32) bool { return val.(float32) == v }
813 }
814
815 h := hashFloat32(val.(float32), 0)
816 e, ok := s.tbl.Lookup(h, cmp)
817
818 if ok {
819 idx = int(e.payload.memoIdx)
820 found = true
821 } else {
822 idx = s.Size()
823 s.tbl.Insert(e, h, val.(float32), int32(idx))
824 }
825 return
826 }
827
828 type payloadFloat64 struct {
829 val float64
830 memoIdx int32
831 }
832
833 type entryFloat64 struct {
834 h uint64
835 payload payloadFloat64
836 }
837
838 func (e entryFloat64) Valid() bool { return e.h != sentinel }
839
840 // Float64HashTable is a hashtable specifically for float64 that
841 // is utilized with the MemoTable to generalize interactions for easier
842 // implementation of dictionaries without losing performance.
843 type Float64HashTable struct {
844 cap uint64
845 capMask uint64
846 size uint64
847
848 entries []entryFloat64
849 }
850
851 // NewFloat64HashTable returns a new hash table for float64 values
852 // initialized with the passed in capacity or 32 whichever is larger.
853 func NewFloat64HashTable(cap uint64) *Float64HashTable {
854 initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
855 ret := &Float64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
856 ret.entries = make([]entryFloat64, initCap)
857 return ret
858 }
859
860 // Reset drops all of the values in this hash table and re-initializes it
861 // with the specified initial capacity as if by calling New, but without having
862 // to reallocate the object.
863 func (h *Float64HashTable) Reset(cap uint64) {
864 h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
865 h.capMask = h.cap - 1
866 h.size = 0
867 h.entries = make([]entryFloat64, h.cap)
868 }
869
870 // CopyValues is used for copying the values out of the hash table into the
871 // passed in slice, in the order that they were first inserted
872 func (h *Float64HashTable) CopyValues(out []float64) {
873 h.CopyValuesSubset(0, out)
874 }
875
876 // CopyValuesSubset copies a subset of the values in the hashtable out, starting
877 // with the value at start, in the order that they were inserted.
878 func (h *Float64HashTable) CopyValuesSubset(start int, out []float64) {
879 h.VisitEntries(func(e *entryFloat64) {
880 idx := e.payload.memoIdx - int32(start)
881 if idx >= 0 {
882 out[idx] = e.payload.val
883 }
884 })
885 }
886
887 func (h *Float64HashTable) WriteOut(out []byte) {
888 h.WriteOutSubset(0, out)
889 }
890
891 func (h *Float64HashTable) WriteOutSubset(start int, out []byte) {
892 data := arrow.Float64Traits.CastFromBytes(out)
893 h.VisitEntries(func(e *entryFloat64) {
894 idx := e.payload.memoIdx - int32(start)
895 if idx >= 0 {
896 data[idx] = utils.ToLEFloat64(e.payload.val)
897 }
898 })
899 }
900
901 func (h *Float64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
902
903 func (Float64HashTable) fixHash(v uint64) uint64 {
904 if v == sentinel {
905 return 42
906 }
907 return v
908 }
909
910 // Lookup retrieves the entry for a given hash value assuming it's payload value returns
911 // true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
912 // and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
913 func (h *Float64HashTable) Lookup(v uint64, cmp func(float64) bool) (*entryFloat64, bool) {
914 idx, ok := h.lookup(v, h.capMask, cmp)
915 return &h.entries[idx], ok
916 }
917
918 func (h *Float64HashTable) lookup(v uint64, szMask uint64, cmp func(float64) bool) (uint64, bool) {
919 const perturbShift uint8 = 5
920
921 var (
922 idx uint64
923 perturb uint64
924 e *entryFloat64
925 )
926
927 v = h.fixHash(v)
928 idx = v & szMask
929 perturb = (v >> uint64(perturbShift)) + 1
930
931 for {
932 e = &h.entries[idx]
933 if e.h == v && cmp(e.payload.val) {
934 return idx, true
935 }
936
937 if e.h == sentinel {
938 return idx, false
939 }
940
941 // perturbation logic inspired from CPython's set/dict object
942 // the goal is that all 64 bits of unmasked hash value eventually
943 // participate int he probing sequence, to minimize clustering
944 idx = (idx + perturb) & szMask
945 perturb = (perturb >> uint64(perturbShift)) + 1
946 }
947 }
948
949 func (h *Float64HashTable) upsize(newcap uint64) error {
950 newMask := newcap - 1
951
952 oldEntries := h.entries
953 h.entries = make([]entryFloat64, newcap)
954 for _, e := range oldEntries {
955 if e.Valid() {
956 idx, _ := h.lookup(e.h, newMask, func(float64) bool { return false })
957 h.entries[idx] = e
958 }
959 }
960 h.cap = newcap
961 h.capMask = newMask
962 return nil
963 }
964
965 // Insert updates the given entry with the provided hash value, payload value and memo index.
966 // The entry pointer must have been retrieved via lookup in order to actually insert properly.
967 func (h *Float64HashTable) Insert(e *entryFloat64, v uint64, val float64, memoIdx int32) error {
968 e.h = h.fixHash(v)
969 e.payload.val = val
970 e.payload.memoIdx = memoIdx
971 h.size++
972
973 if h.needUpsize() {
974 h.upsize(h.cap * uint64(loadFactor) * 2)
975 }
976 return nil
977 }
978
979 // VisitEntries will call the passed in function on each *valid* entry in the hash table,
980 // a valid entry being one which has had a value inserted into it.
981 func (h *Float64HashTable) VisitEntries(visit func(*entryFloat64)) {
982 for _, e := range h.entries {
983 if e.Valid() {
984 visit(&e)
985 }
986 }
987 }
988
989 // Float64MemoTable is a wrapper over the appropriate hashtable to provide an interface
990 // conforming to the MemoTable interface defined in the encoding package for general interactions
991 // regarding dictionaries.
992 type Float64MemoTable struct {
993 tbl *Float64HashTable
994 nullIdx int32
995 }
996
997 // NewFloat64MemoTable returns a new memotable with num entries pre-allocated to reduce further
998 // allocations when inserting.
999 func NewFloat64MemoTable(num int64) *Float64MemoTable {
1000 return &Float64MemoTable{tbl: NewFloat64HashTable(uint64(num)), nullIdx: KeyNotFound}
1001 }
1002
1003 // Reset allows this table to be re-used by dumping all the data currently in the table.
1004 func (s *Float64MemoTable) Reset() {
1005 s.tbl.Reset(32)
1006 s.nullIdx = KeyNotFound
1007 }
1008
1009 // Size returns the current number of inserted elements into the table including if a null
1010 // has been inserted.
1011 func (s *Float64MemoTable) Size() int {
1012 sz := int(s.tbl.size)
1013 if _, ok := s.GetNull(); ok {
1014 sz++
1015 }
1016 return sz
1017 }
1018
1019 // GetNull returns the index of an inserted null or KeyNotFound along with a bool
1020 // that will be true if found and false if not.
1021 func (s *Float64MemoTable) GetNull() (int, bool) {
1022 return int(s.nullIdx), s.nullIdx != KeyNotFound
1023 }
1024
1025 // GetOrInsertNull will return the index of the null entry or insert a null entry
1026 // if one currently doesn't exist. The found value will be true if there was already
1027 // a null in the table, and false if it inserted one.
1028 func (s *Float64MemoTable) GetOrInsertNull() (idx int, found bool) {
1029 idx, found = s.GetNull()
1030 if !found {
1031 idx = s.Size()
1032 s.nullIdx = int32(idx)
1033 }
1034 return
1035 }
1036
1037 // CopyValues will copy the values from the memo table out into the passed in slice
1038 // which must be of the appropriate type.
1039 func (s *Float64MemoTable) CopyValues(out interface{}) {
1040 s.CopyValuesSubset(0, out)
1041 }
1042
1043 // CopyValuesSubset is like CopyValues but only copies a subset of values starting
1044 // at the provided start index
1045 func (s *Float64MemoTable) CopyValuesSubset(start int, out interface{}) {
1046 s.tbl.CopyValuesSubset(start, out.([]float64))
1047 }
1048
1049 func (s *Float64MemoTable) WriteOut(out []byte) {
1050 s.tbl.WriteOut(out)
1051 }
1052
1053 func (s *Float64MemoTable) WriteOutSubset(start int, out []byte) {
1054 s.tbl.WriteOutSubset(start, out)
1055 }
1056
1057 // Get returns the index of the requested value in the hash table or KeyNotFound
1058 // along with a boolean indicating if it was found or not.
1059 func (s *Float64MemoTable) Get(val interface{}) (int, bool) {
1060 var cmp func(float64) bool
1061 if math.IsNaN(val.(float64)) {
1062 cmp = math.IsNaN
1063 // use consistent internal bit pattern for NaN regardless of the pattern
1064 // that is passed to us. NaN is NaN is NaN
1065 val = math.NaN()
1066 } else {
1067 cmp = func(v float64) bool { return val.(float64) == v }
1068 }
1069
1070 h := hashFloat64(val.(float64), 0)
1071 if e, ok := s.tbl.Lookup(h, cmp); ok {
1072 return int(e.payload.memoIdx), ok
1073 }
1074 return KeyNotFound, false
1075 }
1076
1077 // GetOrInsert will return the index of the specified value in the table, or insert the
1078 // value into the table and return the new index. found indicates whether or not it already
1079 // existed in the table (true) or was inserted by this call (false).
1080 func (s *Float64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
1081
1082 var cmp func(float64) bool
1083 if math.IsNaN(val.(float64)) {
1084 cmp = math.IsNaN
1085 // use consistent internal bit pattern for NaN regardless of the pattern
1086 // that is passed to us. NaN is NaN is NaN
1087 val = math.NaN()
1088 } else {
1089 cmp = func(v float64) bool { return val.(float64) == v }
1090 }
1091
1092 h := hashFloat64(val.(float64), 0)
1093 e, ok := s.tbl.Lookup(h, cmp)
1094
1095 if ok {
1096 idx = int(e.payload.memoIdx)
1097 found = true
1098 } else {
1099 idx = s.Size()
1100 s.tbl.Insert(e, h, val.(float64), int32(idx))
1101 }
1102 return
1103 }