]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | // This source code is licensed under the BSD-style license found in the | |
3 | // LICENSE file in the root directory of this source tree. An additional grant | |
4 | // of patent rights can be found in the PATENTS file in the same directory. | |
5 | // | |
6 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #include "table/block_based_filter_block.h" | |
11 | #include <algorithm> | |
12 | ||
13 | #include "db/dbformat.h" | |
14 | #include "monitoring/perf_context_imp.h" | |
15 | #include "rocksdb/filter_policy.h" | |
16 | #include "util/coding.h" | |
17 | #include "util/string_util.h" | |
18 | ||
19 | namespace rocksdb { | |
20 | ||
21 | namespace { | |
22 | ||
23 | void AppendItem(std::string* props, const std::string& key, | |
24 | const std::string& value) { | |
25 | char cspace = ' '; | |
26 | std::string value_str(""); | |
27 | size_t i = 0; | |
28 | const size_t dataLength = 64; | |
29 | const size_t tabLength = 2; | |
30 | const size_t offLength = 16; | |
31 | ||
32 | value_str.append(&value[i], std::min(size_t(dataLength), value.size())); | |
33 | i += dataLength; | |
34 | while (i < value.size()) { | |
35 | value_str.append("\n"); | |
36 | value_str.append(offLength, cspace); | |
37 | value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i)); | |
38 | i += dataLength; | |
39 | } | |
40 | ||
41 | std::string result(""); | |
42 | if (key.size() < (offLength - tabLength)) | |
43 | result.append(size_t((offLength - tabLength)) - key.size(), cspace); | |
44 | result.append(key); | |
45 | ||
46 | props->append(result + ": " + value_str + "\n"); | |
47 | } | |
48 | ||
49 | template <class TKey> | |
50 | void AppendItem(std::string* props, const TKey& key, const std::string& value) { | |
51 | std::string key_str = rocksdb::ToString(key); | |
52 | AppendItem(props, key_str, value); | |
53 | } | |
54 | } // namespace | |
55 | ||
56 | ||
57 | // See doc/table_format.txt for an explanation of the filter block format. | |
58 | ||
59 | // Generate new filter every 2KB of data | |
60 | static const size_t kFilterBaseLg = 11; | |
61 | static const size_t kFilterBase = 1 << kFilterBaseLg; | |
62 | ||
63 | BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( | |
64 | const SliceTransform* prefix_extractor, | |
65 | const BlockBasedTableOptions& table_opt) | |
66 | : policy_(table_opt.filter_policy.get()), | |
67 | prefix_extractor_(prefix_extractor), | |
68 | whole_key_filtering_(table_opt.whole_key_filtering), | |
69 | prev_prefix_start_(0), | |
70 | prev_prefix_size_(0) { | |
71 | assert(policy_); | |
72 | } | |
73 | ||
74 | void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { | |
75 | uint64_t filter_index = (block_offset / kFilterBase); | |
76 | assert(filter_index >= filter_offsets_.size()); | |
77 | while (filter_index > filter_offsets_.size()) { | |
78 | GenerateFilter(); | |
79 | } | |
80 | } | |
81 | ||
82 | void BlockBasedFilterBlockBuilder::Add(const Slice& key) { | |
83 | if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { | |
84 | AddPrefix(key); | |
85 | } | |
86 | ||
87 | if (whole_key_filtering_) { | |
88 | AddKey(key); | |
89 | } | |
90 | } | |
91 | ||
92 | // Add key to filter if needed | |
93 | inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) { | |
94 | start_.push_back(entries_.size()); | |
95 | entries_.append(key.data(), key.size()); | |
96 | } | |
97 | ||
98 | // Add prefix to filter if needed | |
99 | inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { | |
100 | // get slice for most recently added entry | |
101 | Slice prev; | |
102 | if (prev_prefix_size_ > 0) { | |
103 | prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_); | |
104 | } | |
105 | ||
106 | Slice prefix = prefix_extractor_->Transform(key); | |
107 | // insert prefix only when it's different from the previous prefix. | |
108 | if (prev.size() == 0 || prefix != prev) { | |
109 | start_.push_back(entries_.size()); | |
110 | prev_prefix_start_ = entries_.size(); | |
111 | prev_prefix_size_ = prefix.size(); | |
112 | entries_.append(prefix.data(), prefix.size()); | |
113 | } | |
114 | } | |
115 | ||
116 | Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& tmp, | |
117 | Status* status) { | |
118 | // In this impl we ignore BlockHandle | |
119 | *status = Status::OK(); | |
120 | if (!start_.empty()) { | |
121 | GenerateFilter(); | |
122 | } | |
123 | ||
124 | // Append array of per-filter offsets | |
125 | const uint32_t array_offset = static_cast<uint32_t>(result_.size()); | |
126 | for (size_t i = 0; i < filter_offsets_.size(); i++) { | |
127 | PutFixed32(&result_, filter_offsets_[i]); | |
128 | } | |
129 | ||
130 | PutFixed32(&result_, array_offset); | |
131 | result_.push_back(kFilterBaseLg); // Save encoding parameter in result | |
132 | return Slice(result_); | |
133 | } | |
134 | ||
135 | void BlockBasedFilterBlockBuilder::GenerateFilter() { | |
136 | const size_t num_entries = start_.size(); | |
137 | if (num_entries == 0) { | |
138 | // Fast path if there are no keys for this filter | |
139 | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); | |
140 | return; | |
141 | } | |
142 | ||
143 | // Make list of keys from flattened key structure | |
144 | start_.push_back(entries_.size()); // Simplify length computation | |
145 | tmp_entries_.resize(num_entries); | |
146 | for (size_t i = 0; i < num_entries; i++) { | |
147 | const char* base = entries_.data() + start_[i]; | |
148 | size_t length = start_[i + 1] - start_[i]; | |
149 | tmp_entries_[i] = Slice(base, length); | |
150 | } | |
151 | ||
152 | // Generate filter for current set of keys and append to result_. | |
153 | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); | |
154 | policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries), | |
155 | &result_); | |
156 | ||
157 | tmp_entries_.clear(); | |
158 | entries_.clear(); | |
159 | start_.clear(); | |
160 | prev_prefix_start_ = 0; | |
161 | prev_prefix_size_ = 0; | |
162 | } | |
163 | ||
164 | BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( | |
165 | const SliceTransform* prefix_extractor, | |
166 | const BlockBasedTableOptions& table_opt, bool _whole_key_filtering, | |
167 | BlockContents&& contents, Statistics* stats) | |
168 | : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering), | |
169 | policy_(table_opt.filter_policy.get()), | |
170 | prefix_extractor_(prefix_extractor), | |
171 | data_(nullptr), | |
172 | offset_(nullptr), | |
173 | num_(0), | |
174 | base_lg_(0), | |
175 | contents_(std::move(contents)) { | |
176 | assert(policy_); | |
177 | size_t n = contents_.data.size(); | |
178 | if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array | |
179 | base_lg_ = contents_.data[n - 1]; | |
180 | uint32_t last_word = DecodeFixed32(contents_.data.data() + n - 5); | |
181 | if (last_word > n - 5) return; | |
182 | data_ = contents_.data.data(); | |
183 | offset_ = data_ + last_word; | |
184 | num_ = (n - 5 - last_word) / 4; | |
185 | } | |
186 | ||
187 | bool BlockBasedFilterBlockReader::KeyMayMatch( | |
188 | const Slice& key, uint64_t block_offset, const bool no_io, | |
189 | const Slice* const const_ikey_ptr) { | |
190 | assert(block_offset != kNotValid); | |
191 | if (!whole_key_filtering_) { | |
192 | return true; | |
193 | } | |
194 | return MayMatch(key, block_offset); | |
195 | } | |
196 | ||
197 | bool BlockBasedFilterBlockReader::PrefixMayMatch( | |
198 | const Slice& prefix, uint64_t block_offset, const bool no_io, | |
199 | const Slice* const const_ikey_ptr) { | |
200 | assert(block_offset != kNotValid); | |
201 | if (!prefix_extractor_) { | |
202 | return true; | |
203 | } | |
204 | return MayMatch(prefix, block_offset); | |
205 | } | |
206 | ||
207 | bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry, | |
208 | uint64_t block_offset) { | |
209 | uint64_t index = block_offset >> base_lg_; | |
210 | if (index < num_) { | |
211 | uint32_t start = DecodeFixed32(offset_ + index * 4); | |
212 | uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); | |
213 | if (start <= limit && limit <= (uint32_t)(offset_ - data_)) { | |
214 | Slice filter = Slice(data_ + start, limit - start); | |
215 | bool const may_match = policy_->KeyMayMatch(entry, filter); | |
216 | if (may_match) { | |
217 | PERF_COUNTER_ADD(bloom_sst_hit_count, 1); | |
218 | return true; | |
219 | } else { | |
220 | PERF_COUNTER_ADD(bloom_sst_miss_count, 1); | |
221 | return false; | |
222 | } | |
223 | } else if (start == limit) { | |
224 | // Empty filters do not match any entries | |
225 | return false; | |
226 | } | |
227 | } | |
228 | return true; // Errors are treated as potential matches | |
229 | } | |
230 | ||
231 | size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const { | |
232 | return num_ * 4 + 5 + (offset_ - data_); | |
233 | } | |
234 | ||
235 | std::string BlockBasedFilterBlockReader::ToString() const { | |
236 | std::string result, filter_meta; | |
237 | result.reserve(1024); | |
238 | ||
239 | std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks"); | |
240 | AppendItem(&result, s_fb, rocksdb::ToString(num_)); | |
241 | AppendItem(&result, s_bo, s_hd); | |
242 | ||
243 | for (size_t index = 0; index < num_; index++) { | |
244 | uint32_t start = DecodeFixed32(offset_ + index * 4); | |
245 | uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); | |
246 | ||
247 | if (start != limit) { | |
248 | result.append(" filter block # " + rocksdb::ToString(index + 1) + "\n"); | |
249 | Slice filter = Slice(data_ + start, limit - start); | |
250 | AppendItem(&result, start, filter.ToString(true)); | |
251 | } | |
252 | } | |
253 | return result; | |
254 | } | |
255 | } // namespace rocksdb |