]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
f67539c2 | 10 | #include "table/block_based/block_based_filter_block.h" |
7c673cae FG |
11 | #include <algorithm> |
12 | ||
13 | #include "db/dbformat.h" | |
14 | #include "monitoring/perf_context_imp.h" | |
15 | #include "rocksdb/filter_policy.h" | |
f67539c2 | 16 | #include "table/block_based/block_based_table_reader.h" |
7c673cae FG |
17 | #include "util/coding.h" |
18 | #include "util/string_util.h" | |
19 | ||
f67539c2 | 20 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
21 | |
22 | namespace { | |
23 | ||
24 | void AppendItem(std::string* props, const std::string& key, | |
25 | const std::string& value) { | |
26 | char cspace = ' '; | |
27 | std::string value_str(""); | |
28 | size_t i = 0; | |
29 | const size_t dataLength = 64; | |
30 | const size_t tabLength = 2; | |
31 | const size_t offLength = 16; | |
32 | ||
33 | value_str.append(&value[i], std::min(size_t(dataLength), value.size())); | |
34 | i += dataLength; | |
35 | while (i < value.size()) { | |
36 | value_str.append("\n"); | |
37 | value_str.append(offLength, cspace); | |
38 | value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i)); | |
39 | i += dataLength; | |
40 | } | |
41 | ||
42 | std::string result(""); | |
43 | if (key.size() < (offLength - tabLength)) | |
44 | result.append(size_t((offLength - tabLength)) - key.size(), cspace); | |
45 | result.append(key); | |
46 | ||
47 | props->append(result + ": " + value_str + "\n"); | |
48 | } | |
49 | ||
50 | template <class TKey> | |
51 | void AppendItem(std::string* props, const TKey& key, const std::string& value) { | |
f67539c2 | 52 | std::string key_str = ROCKSDB_NAMESPACE::ToString(key); |
7c673cae FG |
53 | AppendItem(props, key_str, value); |
54 | } | |
55 | } // namespace | |
56 | ||
7c673cae FG |
57 | // See doc/table_format.txt for an explanation of the filter block format. |
58 | ||
59 | // Generate new filter every 2KB of data | |
60 | static const size_t kFilterBaseLg = 11; | |
61 | static const size_t kFilterBase = 1 << kFilterBaseLg; | |
62 | ||
63 | BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( | |
64 | const SliceTransform* prefix_extractor, | |
65 | const BlockBasedTableOptions& table_opt) | |
66 | : policy_(table_opt.filter_policy.get()), | |
67 | prefix_extractor_(prefix_extractor), | |
68 | whole_key_filtering_(table_opt.whole_key_filtering), | |
69 | prev_prefix_start_(0), | |
11fdf7f2 TL |
70 | prev_prefix_size_(0), |
71 | num_added_(0) { | |
7c673cae FG |
72 | assert(policy_); |
73 | } | |
74 | ||
75 | void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { | |
76 | uint64_t filter_index = (block_offset / kFilterBase); | |
77 | assert(filter_index >= filter_offsets_.size()); | |
78 | while (filter_index > filter_offsets_.size()) { | |
79 | GenerateFilter(); | |
80 | } | |
81 | } | |
82 | ||
83 | void BlockBasedFilterBlockBuilder::Add(const Slice& key) { | |
84 | if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { | |
85 | AddPrefix(key); | |
86 | } | |
87 | ||
88 | if (whole_key_filtering_) { | |
89 | AddKey(key); | |
90 | } | |
91 | } | |
92 | ||
93 | // Add key to filter if needed | |
94 | inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) { | |
11fdf7f2 | 95 | num_added_++; |
7c673cae FG |
96 | start_.push_back(entries_.size()); |
97 | entries_.append(key.data(), key.size()); | |
98 | } | |
99 | ||
100 | // Add prefix to filter if needed | |
101 | inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { | |
102 | // get slice for most recently added entry | |
103 | Slice prev; | |
104 | if (prev_prefix_size_ > 0) { | |
105 | prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_); | |
106 | } | |
107 | ||
108 | Slice prefix = prefix_extractor_->Transform(key); | |
109 | // insert prefix only when it's different from the previous prefix. | |
110 | if (prev.size() == 0 || prefix != prev) { | |
7c673cae FG |
111 | prev_prefix_start_ = entries_.size(); |
112 | prev_prefix_size_ = prefix.size(); | |
11fdf7f2 | 113 | AddKey(prefix); |
7c673cae FG |
114 | } |
115 | } | |
116 | ||
11fdf7f2 | 117 | Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, |
7c673cae FG |
118 | Status* status) { |
119 | // In this impl we ignore BlockHandle | |
120 | *status = Status::OK(); | |
121 | if (!start_.empty()) { | |
122 | GenerateFilter(); | |
123 | } | |
124 | ||
125 | // Append array of per-filter offsets | |
126 | const uint32_t array_offset = static_cast<uint32_t>(result_.size()); | |
127 | for (size_t i = 0; i < filter_offsets_.size(); i++) { | |
128 | PutFixed32(&result_, filter_offsets_[i]); | |
129 | } | |
130 | ||
131 | PutFixed32(&result_, array_offset); | |
132 | result_.push_back(kFilterBaseLg); // Save encoding parameter in result | |
133 | return Slice(result_); | |
134 | } | |
135 | ||
136 | void BlockBasedFilterBlockBuilder::GenerateFilter() { | |
137 | const size_t num_entries = start_.size(); | |
138 | if (num_entries == 0) { | |
139 | // Fast path if there are no keys for this filter | |
140 | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); | |
141 | return; | |
142 | } | |
143 | ||
144 | // Make list of keys from flattened key structure | |
145 | start_.push_back(entries_.size()); // Simplify length computation | |
146 | tmp_entries_.resize(num_entries); | |
147 | for (size_t i = 0; i < num_entries; i++) { | |
148 | const char* base = entries_.data() + start_[i]; | |
149 | size_t length = start_[i + 1] - start_[i]; | |
150 | tmp_entries_[i] = Slice(base, length); | |
151 | } | |
152 | ||
153 | // Generate filter for current set of keys and append to result_. | |
154 | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); | |
155 | policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries), | |
156 | &result_); | |
157 | ||
158 | tmp_entries_.clear(); | |
159 | entries_.clear(); | |
160 | start_.clear(); | |
161 | prev_prefix_start_ = 0; | |
162 | prev_prefix_size_ = 0; | |
163 | } | |
164 | ||
165 | BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( | |
f67539c2 TL |
166 | const BlockBasedTable* t, CachableEntry<BlockContents>&& filter_block) |
167 | : FilterBlockReaderCommon(t, std::move(filter_block)) { | |
168 | assert(table()); | |
169 | assert(table()->get_rep()); | |
170 | assert(table()->get_rep()->filter_policy); | |
171 | } | |
172 | ||
173 | std::unique_ptr<FilterBlockReader> BlockBasedFilterBlockReader::Create( | |
20effc67 TL |
174 | const BlockBasedTable* table, const ReadOptions& ro, |
175 | FilePrefetchBuffer* prefetch_buffer, bool use_cache, bool prefetch, | |
176 | bool pin, BlockCacheLookupContext* lookup_context) { | |
f67539c2 TL |
177 | assert(table); |
178 | assert(table->get_rep()); | |
179 | assert(!pin || prefetch); | |
180 | ||
181 | CachableEntry<BlockContents> filter_block; | |
182 | if (prefetch || !use_cache) { | |
20effc67 TL |
183 | const Status s = ReadFilterBlock(table, prefetch_buffer, ro, use_cache, |
184 | nullptr /* get_context */, lookup_context, | |
185 | &filter_block); | |
f67539c2 | 186 | if (!s.ok()) { |
20effc67 | 187 | IGNORE_STATUS_IF_ERROR(s); |
f67539c2 TL |
188 | return std::unique_ptr<FilterBlockReader>(); |
189 | } | |
190 | ||
191 | if (use_cache && !pin) { | |
192 | filter_block.Reset(); | |
193 | } | |
194 | } | |
195 | ||
196 | return std::unique_ptr<FilterBlockReader>( | |
197 | new BlockBasedFilterBlockReader(table, std::move(filter_block))); | |
7c673cae FG |
198 | } |
199 | ||
200 | bool BlockBasedFilterBlockReader::KeyMayMatch( | |
11fdf7f2 | 201 | const Slice& key, const SliceTransform* /* prefix_extractor */, |
f67539c2 TL |
202 | uint64_t block_offset, const bool no_io, |
203 | const Slice* const /*const_ikey_ptr*/, GetContext* get_context, | |
204 | BlockCacheLookupContext* lookup_context) { | |
7c673cae | 205 | assert(block_offset != kNotValid); |
f67539c2 | 206 | if (!whole_key_filtering()) { |
7c673cae FG |
207 | return true; |
208 | } | |
f67539c2 | 209 | return MayMatch(key, block_offset, no_io, get_context, lookup_context); |
7c673cae FG |
210 | } |
211 | ||
212 | bool BlockBasedFilterBlockReader::PrefixMayMatch( | |
11fdf7f2 | 213 | const Slice& prefix, const SliceTransform* /* prefix_extractor */, |
f67539c2 TL |
214 | uint64_t block_offset, const bool no_io, |
215 | const Slice* const /*const_ikey_ptr*/, GetContext* get_context, | |
216 | BlockCacheLookupContext* lookup_context) { | |
7c673cae | 217 | assert(block_offset != kNotValid); |
f67539c2 TL |
218 | return MayMatch(prefix, block_offset, no_io, get_context, lookup_context); |
219 | } | |
220 | ||
221 | bool BlockBasedFilterBlockReader::ParseFieldsFromBlock( | |
222 | const BlockContents& contents, const char** data, const char** offset, | |
223 | size_t* num, size_t* base_lg) { | |
224 | assert(data); | |
225 | assert(offset); | |
226 | assert(num); | |
227 | assert(base_lg); | |
228 | ||
229 | const size_t n = contents.data.size(); | |
230 | if (n < 5) { // 1 byte for base_lg and 4 for start of offset array | |
231 | return false; | |
232 | } | |
233 | ||
234 | const uint32_t last_word = DecodeFixed32(contents.data.data() + n - 5); | |
235 | if (last_word > n - 5) { | |
236 | return false; | |
237 | } | |
238 | ||
239 | *data = contents.data.data(); | |
240 | *offset = (*data) + last_word; | |
241 | *num = (n - 5 - last_word) / 4; | |
242 | *base_lg = contents.data[n - 1]; | |
243 | ||
244 | return true; | |
245 | } | |
246 | ||
247 | bool BlockBasedFilterBlockReader::MayMatch( | |
248 | const Slice& entry, uint64_t block_offset, bool no_io, | |
249 | GetContext* get_context, BlockCacheLookupContext* lookup_context) const { | |
250 | CachableEntry<BlockContents> filter_block; | |
251 | ||
252 | const Status s = | |
253 | GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block); | |
254 | if (!s.ok()) { | |
255 | return true; | |
256 | } | |
257 | ||
258 | assert(filter_block.GetValue()); | |
259 | ||
260 | const char* data = nullptr; | |
261 | const char* offset = nullptr; | |
262 | size_t num = 0; | |
263 | size_t base_lg = 0; | |
264 | if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num, | |
265 | &base_lg)) { | |
266 | return true; // Errors are treated as potential matches | |
267 | } | |
268 | ||
269 | const uint64_t index = block_offset >> base_lg; | |
270 | if (index < num) { | |
271 | const uint32_t start = DecodeFixed32(offset + index * 4); | |
272 | const uint32_t limit = DecodeFixed32(offset + index * 4 + 4); | |
273 | if (start <= limit && limit <= (uint32_t)(offset - data)) { | |
274 | const Slice filter = Slice(data + start, limit - start); | |
275 | ||
276 | assert(table()); | |
277 | assert(table()->get_rep()); | |
278 | const FilterPolicy* const policy = table()->get_rep()->filter_policy; | |
279 | ||
280 | const bool may_match = policy->KeyMayMatch(entry, filter); | |
7c673cae FG |
281 | if (may_match) { |
282 | PERF_COUNTER_ADD(bloom_sst_hit_count, 1); | |
283 | return true; | |
284 | } else { | |
285 | PERF_COUNTER_ADD(bloom_sst_miss_count, 1); | |
286 | return false; | |
287 | } | |
288 | } else if (start == limit) { | |
289 | // Empty filters do not match any entries | |
290 | return false; | |
291 | } | |
292 | } | |
293 | return true; // Errors are treated as potential matches | |
294 | } | |
295 | ||
296 | size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const { | |
f67539c2 TL |
297 | size_t usage = ApproximateFilterBlockMemoryUsage(); |
298 | #ifdef ROCKSDB_MALLOC_USABLE_SIZE | |
299 | usage += malloc_usable_size(const_cast<BlockBasedFilterBlockReader*>(this)); | |
300 | #else | |
301 | usage += sizeof(*this); | |
302 | #endif // ROCKSDB_MALLOC_USABLE_SIZE | |
303 | return usage; | |
7c673cae FG |
304 | } |
305 | ||
306 | std::string BlockBasedFilterBlockReader::ToString() const { | |
f67539c2 TL |
307 | CachableEntry<BlockContents> filter_block; |
308 | ||
309 | const Status s = | |
310 | GetOrReadFilterBlock(false /* no_io */, nullptr /* get_context */, | |
311 | nullptr /* lookup_context */, &filter_block); | |
312 | if (!s.ok()) { | |
313 | return std::string("Unable to retrieve filter block"); | |
314 | } | |
315 | ||
316 | assert(filter_block.GetValue()); | |
317 | ||
318 | const char* data = nullptr; | |
319 | const char* offset = nullptr; | |
320 | size_t num = 0; | |
321 | size_t base_lg = 0; | |
322 | if (!ParseFieldsFromBlock(*filter_block.GetValue(), &data, &offset, &num, | |
323 | &base_lg)) { | |
324 | return std::string("Error parsing filter block"); | |
325 | } | |
326 | ||
11fdf7f2 | 327 | std::string result; |
7c673cae FG |
328 | result.reserve(1024); |
329 | ||
330 | std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks"); | |
f67539c2 | 331 | AppendItem(&result, s_fb, ROCKSDB_NAMESPACE::ToString(num)); |
7c673cae FG |
332 | AppendItem(&result, s_bo, s_hd); |
333 | ||
f67539c2 TL |
334 | for (size_t index = 0; index < num; index++) { |
335 | uint32_t start = DecodeFixed32(offset + index * 4); | |
336 | uint32_t limit = DecodeFixed32(offset + index * 4 + 4); | |
7c673cae FG |
337 | |
338 | if (start != limit) { | |
f67539c2 TL |
339 | result.append(" filter block # " + |
340 | ROCKSDB_NAMESPACE::ToString(index + 1) + "\n"); | |
341 | Slice filter = Slice(data + start, limit - start); | |
7c673cae FG |
342 | AppendItem(&result, start, filter.ToString(true)); |
343 | } | |
344 | } | |
345 | return result; | |
346 | } | |
f67539c2 TL |
347 | |
348 | } // namespace ROCKSDB_NAMESPACE |