1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 #include "rocksdb/slice.h"
9 #include "table/data_block_hash_index.h"
10 #include "util/coding.h"
11 #include "util/hash.h"
15 void DataBlockHashIndexBuilder::Add(const Slice
& key
,
16 const size_t restart_index
) {
18 if (restart_index
> kMaxRestartSupportedByHashIndex
) {
23 uint32_t hash_value
= GetSliceHash(key
);
24 hash_and_restart_pairs_
.emplace_back(hash_value
,
25 static_cast<uint8_t>(restart_index
));
26 estimated_num_buckets_
+= bucket_per_key_
;
29 void DataBlockHashIndexBuilder::Finish(std::string
& buffer
) {
31 uint16_t num_buckets
= static_cast<uint16_t>(estimated_num_buckets_
);
33 if (num_buckets
== 0) {
34 num_buckets
= 1; // sanity check
37 // The build-in hash cannot well distribute strings when into different
38 // buckets when num_buckets is power of two, resulting in high hash
40 // We made the num_buckets to be odd to avoid this issue.
43 std::vector
<uint8_t> buckets(num_buckets
, kNoEntry
);
44 // write the restart_index array
45 for (auto& entry
: hash_and_restart_pairs_
) {
46 uint32_t hash_value
= entry
.first
;
47 uint8_t restart_index
= entry
.second
;
48 uint16_t buck_idx
= static_cast<uint16_t>(hash_value
% num_buckets
);
49 if (buckets
[buck_idx
] == kNoEntry
) {
50 buckets
[buck_idx
] = restart_index
;
51 } else if (buckets
[buck_idx
] != restart_index
) {
52 // same bucket cannot store two different restart_index, mark collision
53 buckets
[buck_idx
] = kCollision
;
57 for (uint8_t restart_index
: buckets
) {
59 const_cast<const char*>(reinterpret_cast<char*>(&restart_index
)),
60 sizeof(restart_index
));
64 PutFixed16(&buffer
, num_buckets
);
66 assert(buffer
.size() <= kMaxBlockSizeSupportedByHashIndex
);
69 void DataBlockHashIndexBuilder::Reset() {
70 estimated_num_buckets_
= 0;
72 hash_and_restart_pairs_
.clear();
75 void DataBlockHashIndex::Initialize(const char* data
, uint16_t size
,
76 uint16_t* map_offset
) {
77 assert(size
>= sizeof(uint16_t)); // NUM_BUCKETS
78 num_buckets_
= DecodeFixed16(data
+ size
- sizeof(uint16_t));
79 assert(num_buckets_
> 0);
80 assert(size
> num_buckets_
* sizeof(uint8_t));
81 *map_offset
= static_cast<uint16_t>(size
- sizeof(uint16_t) -
82 num_buckets_
* sizeof(uint8_t));
85 uint8_t DataBlockHashIndex::Lookup(const char* data
, uint32_t map_offset
,
86 const Slice
& key
) const {
87 uint32_t hash_value
= GetSliceHash(key
);
88 uint16_t idx
= static_cast<uint16_t>(hash_value
% num_buckets_
);
89 const char* bucket_table
= data
+ map_offset
;
90 return static_cast<uint8_t>(*(bucket_table
+ idx
* sizeof(uint8_t)));
93 } // namespace rocksdb