1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
14 #include "table/plain_table_index.h"
15 #include "util/coding.h"
16 #include "util/hash.h"
21 inline uint32_t GetBucketIdFromHash(uint32_t hash
, uint32_t num_buckets
) {
22 assert(num_buckets
> 0);
23 return hash
% num_buckets
;
27 Status
PlainTableIndex::InitFromRawData(Slice data
) {
28 if (!GetVarint32(&data
, &index_size_
)) {
29 return Status::Corruption("Couldn't read the index size!");
31 assert(index_size_
> 0);
32 if (!GetVarint32(&data
, &num_prefixes_
)) {
33 return Status::Corruption("Couldn't read the index size!");
36 static_cast<uint32_t>(data
.size()) - index_size_
* kOffsetLen
;
38 char* index_data_begin
= const_cast<char*>(data
.data());
39 index_
= reinterpret_cast<uint32_t*>(index_data_begin
);
40 sub_index_
= reinterpret_cast<char*>(index_
+ index_size_
);
44 PlainTableIndex::IndexSearchResult
PlainTableIndex::GetOffset(
45 uint32_t prefix_hash
, uint32_t* bucket_value
) const {
46 int bucket
= GetBucketIdFromHash(prefix_hash
, index_size_
);
47 GetUnaligned(index_
+ bucket
, bucket_value
);
48 if ((*bucket_value
& kSubIndexMask
) == kSubIndexMask
) {
49 *bucket_value
^= kSubIndexMask
;
52 if (*bucket_value
>= kMaxFileSize
) {
53 return kNoPrefixForBucket
;
55 // point directly to the file
60 void PlainTableIndexBuilder::IndexRecordList::AddRecord(uint32_t hash
,
62 if (num_records_in_current_group_
== kNumRecordsPerGroup
) {
63 current_group_
= AllocateNewGroup();
64 num_records_in_current_group_
= 0;
66 auto& new_record
= current_group_
[num_records_in_current_group_
++];
67 new_record
.hash
= hash
;
68 new_record
.offset
= offset
;
69 new_record
.next
= nullptr;
72 void PlainTableIndexBuilder::AddKeyPrefix(Slice key_prefix_slice
,
73 uint32_t key_offset
) {
74 if (is_first_record_
|| prev_key_prefix_
!= key_prefix_slice
.ToString()) {
76 if (!is_first_record_
) {
77 keys_per_prefix_hist_
.Add(num_keys_per_prefix_
);
79 num_keys_per_prefix_
= 0;
80 prev_key_prefix_
= key_prefix_slice
.ToString();
81 prev_key_prefix_hash_
= GetSliceHash(key_prefix_slice
);
86 // Add an index key for every kIndexIntervalForSamePrefixKeys keys
87 record_list_
.AddRecord(prev_key_prefix_hash_
, key_offset
);
91 num_keys_per_prefix_
++;
92 if (index_sparseness_
== 0 || num_keys_per_prefix_
% index_sparseness_
== 0) {
95 is_first_record_
= false;
98 Slice
PlainTableIndexBuilder::Finish() {
100 std::vector
<IndexRecord
*> hash_to_offsets(index_size_
, nullptr);
101 std::vector
<uint32_t> entries_per_bucket(index_size_
, 0);
102 BucketizeIndexes(&hash_to_offsets
, &entries_per_bucket
);
104 keys_per_prefix_hist_
.Add(num_keys_per_prefix_
);
105 ROCKS_LOG_INFO(ioptions_
.info_log
, "Number of Keys per prefix Histogram: %s",
106 keys_per_prefix_hist_
.ToString().c_str());
108 // From the temp data structure, populate indexes.
109 return FillIndexes(hash_to_offsets
, entries_per_bucket
);
112 void PlainTableIndexBuilder::AllocateIndex() {
113 if (prefix_extractor_
== nullptr || hash_table_ratio_
<= 0) {
114 // Fall back to pure binary search if the user fails to specify a prefix
118 double hash_table_size_multipier
= 1.0 / hash_table_ratio_
;
120 static_cast<uint32_t>(num_prefixes_
* hash_table_size_multipier
) + 1;
121 assert(index_size_
> 0);
125 void PlainTableIndexBuilder::BucketizeIndexes(
126 std::vector
<IndexRecord
*>* hash_to_offsets
,
127 std::vector
<uint32_t>* entries_per_bucket
) {
129 uint32_t prev_hash
= 0;
130 size_t num_records
= record_list_
.GetNumRecords();
131 for (size_t i
= 0; i
< num_records
; i
++) {
132 IndexRecord
* index_record
= record_list_
.At(i
);
133 uint32_t cur_hash
= index_record
->hash
;
134 if (first
|| prev_hash
!= cur_hash
) {
135 prev_hash
= cur_hash
;
138 uint32_t bucket
= GetBucketIdFromHash(cur_hash
, index_size_
);
139 IndexRecord
* prev_bucket_head
= (*hash_to_offsets
)[bucket
];
140 index_record
->next
= prev_bucket_head
;
141 (*hash_to_offsets
)[bucket
] = index_record
;
142 (*entries_per_bucket
)[bucket
]++;
146 for (auto entry_count
: *entries_per_bucket
) {
147 if (entry_count
<= 1) {
150 // Only buckets with more than 1 entry will have subindex.
151 sub_index_size_
+= VarintLength(entry_count
);
152 // total bytes needed to store these entries' in-file offsets.
153 sub_index_size_
+= entry_count
* PlainTableIndex::kOffsetLen
;
157 Slice
PlainTableIndexBuilder::FillIndexes(
158 const std::vector
<IndexRecord
*>& hash_to_offsets
,
159 const std::vector
<uint32_t>& entries_per_bucket
) {
160 ROCKS_LOG_DEBUG(ioptions_
.info_log
,
161 "Reserving %" PRIu32
" bytes for plain table's sub_index",
163 auto total_allocate_size
= GetTotalSize();
164 char* allocated
= arena_
->AllocateAligned(
165 total_allocate_size
, huge_page_tlb_size_
, ioptions_
.info_log
);
167 auto temp_ptr
= EncodeVarint32(allocated
, index_size_
);
169 reinterpret_cast<uint32_t*>(EncodeVarint32(temp_ptr
, num_prefixes_
));
170 char* sub_index
= reinterpret_cast<char*>(index
+ index_size_
);
172 uint32_t sub_index_offset
= 0;
173 for (uint32_t i
= 0; i
< index_size_
; i
++) {
174 uint32_t num_keys_for_bucket
= entries_per_bucket
[i
];
175 switch (num_keys_for_bucket
) {
178 PutUnaligned(index
+ i
, (uint32_t)PlainTableIndex::kMaxFileSize
);
181 // point directly to the file offset
182 PutUnaligned(index
+ i
, hash_to_offsets
[i
]->offset
);
185 // point to second level indexes.
186 PutUnaligned(index
+ i
, sub_index_offset
| PlainTableIndex::kSubIndexMask
);
187 char* prev_ptr
= &sub_index
[sub_index_offset
];
188 char* cur_ptr
= EncodeVarint32(prev_ptr
, num_keys_for_bucket
);
189 sub_index_offset
+= static_cast<uint32_t>(cur_ptr
- prev_ptr
);
190 char* sub_index_pos
= &sub_index
[sub_index_offset
];
191 IndexRecord
* record
= hash_to_offsets
[i
];
193 for (j
= num_keys_for_bucket
- 1; j
>= 0 && record
;
194 j
--, record
= record
->next
) {
195 EncodeFixed32(sub_index_pos
+ j
* sizeof(uint32_t), record
->offset
);
197 assert(j
== -1 && record
== nullptr);
198 sub_index_offset
+= PlainTableIndex::kOffsetLen
* num_keys_for_bucket
;
199 assert(sub_index_offset
<= sub_index_size_
);
203 assert(sub_index_offset
== sub_index_size_
);
205 ROCKS_LOG_DEBUG(ioptions_
.info_log
,
206 "hash table size: %d, suffix_map length %" ROCKSDB_PRIszt
,
207 index_size_
, sub_index_size_
);
208 return Slice(allocated
, GetTotalSize());
211 const std::string
PlainTableIndexBuilder::kPlainTableIndexBlock
=
212 "PlainTableIndexBlock";
213 }; // namespace rocksdb
215 #endif // ROCKSDB_LITE