]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #include "table/block_based_filter_block.h" | |
11 | #include <algorithm> | |
12 | ||
13 | #include "db/dbformat.h" | |
14 | #include "monitoring/perf_context_imp.h" | |
15 | #include "rocksdb/filter_policy.h" | |
16 | #include "util/coding.h" | |
17 | #include "util/string_util.h" | |
18 | ||
19 | namespace rocksdb { | |
20 | ||
21 | namespace { | |
22 | ||
23 | void AppendItem(std::string* props, const std::string& key, | |
24 | const std::string& value) { | |
25 | char cspace = ' '; | |
26 | std::string value_str(""); | |
27 | size_t i = 0; | |
28 | const size_t dataLength = 64; | |
29 | const size_t tabLength = 2; | |
30 | const size_t offLength = 16; | |
31 | ||
32 | value_str.append(&value[i], std::min(size_t(dataLength), value.size())); | |
33 | i += dataLength; | |
34 | while (i < value.size()) { | |
35 | value_str.append("\n"); | |
36 | value_str.append(offLength, cspace); | |
37 | value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i)); | |
38 | i += dataLength; | |
39 | } | |
40 | ||
41 | std::string result(""); | |
42 | if (key.size() < (offLength - tabLength)) | |
43 | result.append(size_t((offLength - tabLength)) - key.size(), cspace); | |
44 | result.append(key); | |
45 | ||
46 | props->append(result + ": " + value_str + "\n"); | |
47 | } | |
48 | ||
49 | template <class TKey> | |
50 | void AppendItem(std::string* props, const TKey& key, const std::string& value) { | |
51 | std::string key_str = rocksdb::ToString(key); | |
52 | AppendItem(props, key_str, value); | |
53 | } | |
54 | } // namespace | |
55 | ||
56 | ||
57 | // See doc/table_format.txt for an explanation of the filter block format. | |
58 | ||
59 | // Generate new filter every 2KB of data | |
60 | static const size_t kFilterBaseLg = 11; | |
61 | static const size_t kFilterBase = 1 << kFilterBaseLg; | |
62 | ||
63 | BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( | |
64 | const SliceTransform* prefix_extractor, | |
65 | const BlockBasedTableOptions& table_opt) | |
66 | : policy_(table_opt.filter_policy.get()), | |
67 | prefix_extractor_(prefix_extractor), | |
68 | whole_key_filtering_(table_opt.whole_key_filtering), | |
69 | prev_prefix_start_(0), | |
11fdf7f2 TL |
70 | prev_prefix_size_(0), |
71 | num_added_(0) { | |
7c673cae FG |
72 | assert(policy_); |
73 | } | |
74 | ||
75 | void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { | |
76 | uint64_t filter_index = (block_offset / kFilterBase); | |
77 | assert(filter_index >= filter_offsets_.size()); | |
78 | while (filter_index > filter_offsets_.size()) { | |
79 | GenerateFilter(); | |
80 | } | |
81 | } | |
82 | ||
83 | void BlockBasedFilterBlockBuilder::Add(const Slice& key) { | |
84 | if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { | |
85 | AddPrefix(key); | |
86 | } | |
87 | ||
88 | if (whole_key_filtering_) { | |
89 | AddKey(key); | |
90 | } | |
91 | } | |
92 | ||
93 | // Add key to filter if needed | |
94 | inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) { | |
11fdf7f2 | 95 | num_added_++; |
7c673cae FG |
96 | start_.push_back(entries_.size()); |
97 | entries_.append(key.data(), key.size()); | |
98 | } | |
99 | ||
100 | // Add prefix to filter if needed | |
101 | inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { | |
102 | // get slice for most recently added entry | |
103 | Slice prev; | |
104 | if (prev_prefix_size_ > 0) { | |
105 | prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_); | |
106 | } | |
107 | ||
108 | Slice prefix = prefix_extractor_->Transform(key); | |
109 | // insert prefix only when it's different from the previous prefix. | |
110 | if (prev.size() == 0 || prefix != prev) { | |
7c673cae FG |
111 | prev_prefix_start_ = entries_.size(); |
112 | prev_prefix_size_ = prefix.size(); | |
11fdf7f2 | 113 | AddKey(prefix); |
7c673cae FG |
114 | } |
115 | } | |
116 | ||
11fdf7f2 | 117 | Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, |
7c673cae FG |
118 | Status* status) { |
119 | // In this impl we ignore BlockHandle | |
120 | *status = Status::OK(); | |
121 | if (!start_.empty()) { | |
122 | GenerateFilter(); | |
123 | } | |
124 | ||
125 | // Append array of per-filter offsets | |
126 | const uint32_t array_offset = static_cast<uint32_t>(result_.size()); | |
127 | for (size_t i = 0; i < filter_offsets_.size(); i++) { | |
128 | PutFixed32(&result_, filter_offsets_[i]); | |
129 | } | |
130 | ||
131 | PutFixed32(&result_, array_offset); | |
132 | result_.push_back(kFilterBaseLg); // Save encoding parameter in result | |
133 | return Slice(result_); | |
134 | } | |
135 | ||
136 | void BlockBasedFilterBlockBuilder::GenerateFilter() { | |
137 | const size_t num_entries = start_.size(); | |
138 | if (num_entries == 0) { | |
139 | // Fast path if there are no keys for this filter | |
140 | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); | |
141 | return; | |
142 | } | |
143 | ||
144 | // Make list of keys from flattened key structure | |
145 | start_.push_back(entries_.size()); // Simplify length computation | |
146 | tmp_entries_.resize(num_entries); | |
147 | for (size_t i = 0; i < num_entries; i++) { | |
148 | const char* base = entries_.data() + start_[i]; | |
149 | size_t length = start_[i + 1] - start_[i]; | |
150 | tmp_entries_[i] = Slice(base, length); | |
151 | } | |
152 | ||
153 | // Generate filter for current set of keys and append to result_. | |
154 | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); | |
155 | policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries), | |
156 | &result_); | |
157 | ||
158 | tmp_entries_.clear(); | |
159 | entries_.clear(); | |
160 | start_.clear(); | |
161 | prev_prefix_start_ = 0; | |
162 | prev_prefix_size_ = 0; | |
163 | } | |
164 | ||
165 | BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( | |
166 | const SliceTransform* prefix_extractor, | |
167 | const BlockBasedTableOptions& table_opt, bool _whole_key_filtering, | |
168 | BlockContents&& contents, Statistics* stats) | |
169 | : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering), | |
170 | policy_(table_opt.filter_policy.get()), | |
171 | prefix_extractor_(prefix_extractor), | |
172 | data_(nullptr), | |
173 | offset_(nullptr), | |
174 | num_(0), | |
175 | base_lg_(0), | |
176 | contents_(std::move(contents)) { | |
177 | assert(policy_); | |
178 | size_t n = contents_.data.size(); | |
179 | if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array | |
180 | base_lg_ = contents_.data[n - 1]; | |
181 | uint32_t last_word = DecodeFixed32(contents_.data.data() + n - 5); | |
182 | if (last_word > n - 5) return; | |
183 | data_ = contents_.data.data(); | |
184 | offset_ = data_ + last_word; | |
185 | num_ = (n - 5 - last_word) / 4; | |
186 | } | |
187 | ||
188 | bool BlockBasedFilterBlockReader::KeyMayMatch( | |
11fdf7f2 TL |
189 | const Slice& key, const SliceTransform* /* prefix_extractor */, |
190 | uint64_t block_offset, const bool /*no_io*/, | |
191 | const Slice* const /*const_ikey_ptr*/) { | |
7c673cae FG |
192 | assert(block_offset != kNotValid); |
193 | if (!whole_key_filtering_) { | |
194 | return true; | |
195 | } | |
196 | return MayMatch(key, block_offset); | |
197 | } | |
198 | ||
199 | bool BlockBasedFilterBlockReader::PrefixMayMatch( | |
11fdf7f2 TL |
200 | const Slice& prefix, const SliceTransform* /* prefix_extractor */, |
201 | uint64_t block_offset, const bool /*no_io*/, | |
202 | const Slice* const /*const_ikey_ptr*/) { | |
7c673cae | 203 | assert(block_offset != kNotValid); |
7c673cae FG |
204 | return MayMatch(prefix, block_offset); |
205 | } | |
206 | ||
207 | bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry, | |
208 | uint64_t block_offset) { | |
209 | uint64_t index = block_offset >> base_lg_; | |
210 | if (index < num_) { | |
211 | uint32_t start = DecodeFixed32(offset_ + index * 4); | |
212 | uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); | |
213 | if (start <= limit && limit <= (uint32_t)(offset_ - data_)) { | |
214 | Slice filter = Slice(data_ + start, limit - start); | |
215 | bool const may_match = policy_->KeyMayMatch(entry, filter); | |
216 | if (may_match) { | |
217 | PERF_COUNTER_ADD(bloom_sst_hit_count, 1); | |
218 | return true; | |
219 | } else { | |
220 | PERF_COUNTER_ADD(bloom_sst_miss_count, 1); | |
221 | return false; | |
222 | } | |
223 | } else if (start == limit) { | |
224 | // Empty filters do not match any entries | |
225 | return false; | |
226 | } | |
227 | } | |
228 | return true; // Errors are treated as potential matches | |
229 | } | |
230 | ||
231 | size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const { | |
232 | return num_ * 4 + 5 + (offset_ - data_); | |
233 | } | |
234 | ||
235 | std::string BlockBasedFilterBlockReader::ToString() const { | |
11fdf7f2 | 236 | std::string result; |
7c673cae FG |
237 | result.reserve(1024); |
238 | ||
239 | std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks"); | |
240 | AppendItem(&result, s_fb, rocksdb::ToString(num_)); | |
241 | AppendItem(&result, s_bo, s_hd); | |
242 | ||
243 | for (size_t index = 0; index < num_; index++) { | |
244 | uint32_t start = DecodeFixed32(offset_ + index * 4); | |
245 | uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); | |
246 | ||
247 | if (start != limit) { | |
248 | result.append(" filter block # " + rocksdb::ToString(index + 1) + "\n"); | |
249 | Slice filter = Slice(data_ + start, limit - start); | |
250 | AppendItem(&result, start, filter.ToString(true)); | |
251 | } | |
252 | } | |
253 | return result; | |
254 | } | |
255 | } // namespace rocksdb |