]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | ||
6 | #pragma once | |
7 | ||
1e59de90 | 8 | #include <cstdint> |
11fdf7f2 TL |
9 | #include <string> |
10 | #include <vector> | |
11 | ||
12 | #include "rocksdb/slice.h" | |
13 | ||
f67539c2 | 14 | namespace ROCKSDB_NAMESPACE { |
11fdf7f2 TL |
15 | // This is an experimental feature aiming to reduce the CPU utilization of |
16 | // point-lookup within a data-block. It is only used in data blocks, and not | |
17 | // in meta-data blocks or per-table index blocks. | |
18 | // | |
19 | // It only used to support BlockBasedTable::Get(). | |
20 | // | |
21 | // A serialized hash index is appended to the data-block. The new block data | |
22 | // format is as follows: | |
23 | // | |
24 | // DATA_BLOCK: [RI RI RI ... RI RI_IDX HASH_IDX FOOTER] | |
25 | // | |
26 | // RI: Restart Interval (the same as the default data-block format) | |
27 | // RI_IDX: Restart Interval index (the same as the default data-block format) | |
28 | // HASH_IDX: The new data-block hash index feature. | |
29 | // FOOTER: A 32bit block footer, which is the NUM_RESTARTS with the MSB as | |
30 | // the flag indicating if this hash index is in use. Note that | |
31 | // given a data block < 32KB, the MSB is never used. So we can | |
32 | // borrow the MSB as the hash index flag. Therefore, this format is | |
33 | // compatible with the legacy data-blocks with num_restarts < 32768, | |
34 | // as the MSB is 0. | |
35 | // | |
36 | // The format of the data-block hash index is as follows: | |
37 | // | |
38 | // HASH_IDX: [B B B ... B NUM_BUCK] | |
39 | // | |
40 | // B: bucket, an array of restart index. Each buckets is uint8_t. | |
41 | // NUM_BUCK: Number of buckets, which is the length of the bucket array. | |
42 | // | |
43 | // We reserve two special flag: | |
44 | // kNoEntry=255, | |
45 | // kCollision=254. | |
46 | // | |
47 | // Therefore, the max number of restarts this hash index can supoport is 253. | |
48 | // | |
49 | // Buckets are initialized to be kNoEntry. | |
50 | // | |
51 | // When storing a key in the hash index, the key is first hashed to a bucket. | |
52 | // If there the bucket is empty (kNoEntry), the restart index is stored in | |
53 | // the bucket. If there is already a restart index there, we will update the | |
54 | // existing restart index to a collision marker (kCollision). If the | |
55 | // the bucket is already marked as collision, we do not store the restart | |
56 | // index either. | |
57 | // | |
58 | // During query process, a key is first hashed to a bucket. Then we examine if | |
59 | // the buckets store nothing (kNoEntry) or the bucket had a collision | |
60 | // (kCollision). If either of those happens, we get the restart index of | |
61 | // the key and will directly go to the restart interval to search the key. | |
62 | // | |
63 | // Note that we only support blocks with #restart_interval < 254. If a block | |
64 | // has more restart interval than that, hash index will not be create for it. | |
65 | ||
66 | const uint8_t kNoEntry = 255; | |
67 | const uint8_t kCollision = 254; | |
68 | const uint8_t kMaxRestartSupportedByHashIndex = 253; | |
69 | ||
70 | // Because we use uint16_t address, we only support block no more than 64KB | |
71 | const size_t kMaxBlockSizeSupportedByHashIndex = 1u << 16; | |
72 | const double kDefaultUtilRatio = 0.75; | |
73 | ||
74 | class DataBlockHashIndexBuilder { | |
75 | public: | |
76 | DataBlockHashIndexBuilder() | |
77 | : bucket_per_key_(-1 /*uninitialized marker*/), | |
78 | estimated_num_buckets_(0), | |
79 | valid_(false) {} | |
80 | ||
81 | void Initialize(double util_ratio) { | |
82 | if (util_ratio <= 0) { | |
83 | util_ratio = kDefaultUtilRatio; // sanity check | |
84 | } | |
85 | bucket_per_key_ = 1 / util_ratio; | |
86 | valid_ = true; | |
87 | } | |
88 | ||
89 | inline bool Valid() const { return valid_ && bucket_per_key_ > 0; } | |
90 | void Add(const Slice& key, const size_t restart_index); | |
91 | void Finish(std::string& buffer); | |
92 | void Reset(); | |
93 | inline size_t EstimateSize() const { | |
94 | uint16_t estimated_num_buckets = | |
95 | static_cast<uint16_t>(estimated_num_buckets_); | |
96 | ||
97 | // Maching the num_buckets number in DataBlockHashIndexBuilder::Finish. | |
98 | estimated_num_buckets |= 1; | |
99 | ||
100 | return sizeof(uint16_t) + | |
101 | static_cast<size_t>(estimated_num_buckets * sizeof(uint8_t)); | |
102 | } | |
103 | ||
104 | private: | |
105 | double bucket_per_key_; // is the multiplicative inverse of util_ratio_ | |
106 | double estimated_num_buckets_; | |
107 | ||
108 | // Now the only usage for `valid_` is to mark false when the inserted | |
109 | // restart_index is larger than supported. In this case HashIndex is not | |
110 | // appended to the block content. | |
111 | bool valid_; | |
112 | ||
113 | std::vector<std::pair<uint32_t, uint8_t>> hash_and_restart_pairs_; | |
114 | friend class DataBlockHashIndex_DataBlockHashTestSmall_Test; | |
115 | }; | |
116 | ||
117 | class DataBlockHashIndex { | |
118 | public: | |
119 | DataBlockHashIndex() : num_buckets_(0) {} | |
120 | ||
121 | void Initialize(const char* data, uint16_t size, uint16_t* map_offset); | |
122 | ||
123 | uint8_t Lookup(const char* data, uint32_t map_offset, const Slice& key) const; | |
124 | ||
125 | inline bool Valid() { return num_buckets_ != 0; } | |
126 | ||
127 | private: | |
128 | // To make the serialized hash index compact and to save the space overhead, | |
129 | // here all the data fields persisted in the block are in uint16 format. | |
130 | // We find that a uint16 is large enough to index every offset of a 64KiB | |
131 | // block. | |
132 | // So in other words, DataBlockHashIndex does not support block size equal | |
133 | // or greater then 64KiB. | |
134 | uint16_t num_buckets_; | |
135 | }; | |
136 | ||
f67539c2 | 137 | } // namespace ROCKSDB_NAMESPACE |