]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae FG |
5 | // |
6 | // Copyright (c) 2012 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #include "table/block_based_filter_block.h" | |
11 | #include <algorithm> | |
12 | ||
13 | #include "db/dbformat.h" | |
14 | #include "monitoring/perf_context_imp.h" | |
15 | #include "rocksdb/filter_policy.h" | |
16 | #include "util/coding.h" | |
17 | #include "util/string_util.h" | |
18 | ||
19 | namespace rocksdb { | |
20 | ||
21 | namespace { | |
22 | ||
23 | void AppendItem(std::string* props, const std::string& key, | |
24 | const std::string& value) { | |
25 | char cspace = ' '; | |
26 | std::string value_str(""); | |
27 | size_t i = 0; | |
28 | const size_t dataLength = 64; | |
29 | const size_t tabLength = 2; | |
30 | const size_t offLength = 16; | |
31 | ||
32 | value_str.append(&value[i], std::min(size_t(dataLength), value.size())); | |
33 | i += dataLength; | |
34 | while (i < value.size()) { | |
35 | value_str.append("\n"); | |
36 | value_str.append(offLength, cspace); | |
37 | value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i)); | |
38 | i += dataLength; | |
39 | } | |
40 | ||
41 | std::string result(""); | |
42 | if (key.size() < (offLength - tabLength)) | |
43 | result.append(size_t((offLength - tabLength)) - key.size(), cspace); | |
44 | result.append(key); | |
45 | ||
46 | props->append(result + ": " + value_str + "\n"); | |
47 | } | |
48 | ||
49 | template <class TKey> | |
50 | void AppendItem(std::string* props, const TKey& key, const std::string& value) { | |
51 | std::string key_str = rocksdb::ToString(key); | |
52 | AppendItem(props, key_str, value); | |
53 | } | |
54 | } // namespace | |
55 | ||
7c673cae FG |
56 | // See doc/table_format.txt for an explanation of the filter block format. |
57 | ||
58 | // Generate new filter every 2KB of data | |
59 | static const size_t kFilterBaseLg = 11; | |
60 | static const size_t kFilterBase = 1 << kFilterBaseLg; | |
61 | ||
62 | BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder( | |
63 | const SliceTransform* prefix_extractor, | |
64 | const BlockBasedTableOptions& table_opt) | |
65 | : policy_(table_opt.filter_policy.get()), | |
66 | prefix_extractor_(prefix_extractor), | |
67 | whole_key_filtering_(table_opt.whole_key_filtering), | |
68 | prev_prefix_start_(0), | |
11fdf7f2 TL |
69 | prev_prefix_size_(0), |
70 | num_added_(0) { | |
7c673cae FG |
71 | assert(policy_); |
72 | } | |
73 | ||
74 | void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) { | |
75 | uint64_t filter_index = (block_offset / kFilterBase); | |
76 | assert(filter_index >= filter_offsets_.size()); | |
77 | while (filter_index > filter_offsets_.size()) { | |
78 | GenerateFilter(); | |
79 | } | |
80 | } | |
81 | ||
82 | void BlockBasedFilterBlockBuilder::Add(const Slice& key) { | |
83 | if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { | |
84 | AddPrefix(key); | |
85 | } | |
86 | ||
87 | if (whole_key_filtering_) { | |
88 | AddKey(key); | |
89 | } | |
90 | } | |
91 | ||
92 | // Add key to filter if needed | |
93 | inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) { | |
11fdf7f2 | 94 | num_added_++; |
7c673cae FG |
95 | start_.push_back(entries_.size()); |
96 | entries_.append(key.data(), key.size()); | |
97 | } | |
98 | ||
99 | // Add prefix to filter if needed | |
100 | inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) { | |
101 | // get slice for most recently added entry | |
102 | Slice prev; | |
103 | if (prev_prefix_size_ > 0) { | |
104 | prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_); | |
105 | } | |
106 | ||
107 | Slice prefix = prefix_extractor_->Transform(key); | |
108 | // insert prefix only when it's different from the previous prefix. | |
109 | if (prev.size() == 0 || prefix != prev) { | |
7c673cae FG |
110 | prev_prefix_start_ = entries_.size(); |
111 | prev_prefix_size_ = prefix.size(); | |
11fdf7f2 | 112 | AddKey(prefix); |
7c673cae FG |
113 | } |
114 | } | |
115 | ||
11fdf7f2 | 116 | Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/, |
7c673cae FG |
117 | Status* status) { |
118 | // In this impl we ignore BlockHandle | |
119 | *status = Status::OK(); | |
120 | if (!start_.empty()) { | |
121 | GenerateFilter(); | |
122 | } | |
123 | ||
124 | // Append array of per-filter offsets | |
125 | const uint32_t array_offset = static_cast<uint32_t>(result_.size()); | |
126 | for (size_t i = 0; i < filter_offsets_.size(); i++) { | |
127 | PutFixed32(&result_, filter_offsets_[i]); | |
128 | } | |
129 | ||
130 | PutFixed32(&result_, array_offset); | |
131 | result_.push_back(kFilterBaseLg); // Save encoding parameter in result | |
132 | return Slice(result_); | |
133 | } | |
134 | ||
135 | void BlockBasedFilterBlockBuilder::GenerateFilter() { | |
136 | const size_t num_entries = start_.size(); | |
137 | if (num_entries == 0) { | |
138 | // Fast path if there are no keys for this filter | |
139 | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); | |
140 | return; | |
141 | } | |
142 | ||
143 | // Make list of keys from flattened key structure | |
144 | start_.push_back(entries_.size()); // Simplify length computation | |
145 | tmp_entries_.resize(num_entries); | |
146 | for (size_t i = 0; i < num_entries; i++) { | |
147 | const char* base = entries_.data() + start_[i]; | |
148 | size_t length = start_[i + 1] - start_[i]; | |
149 | tmp_entries_[i] = Slice(base, length); | |
150 | } | |
151 | ||
152 | // Generate filter for current set of keys and append to result_. | |
153 | filter_offsets_.push_back(static_cast<uint32_t>(result_.size())); | |
154 | policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries), | |
155 | &result_); | |
156 | ||
157 | tmp_entries_.clear(); | |
158 | entries_.clear(); | |
159 | start_.clear(); | |
160 | prev_prefix_start_ = 0; | |
161 | prev_prefix_size_ = 0; | |
162 | } | |
163 | ||
164 | BlockBasedFilterBlockReader::BlockBasedFilterBlockReader( | |
165 | const SliceTransform* prefix_extractor, | |
166 | const BlockBasedTableOptions& table_opt, bool _whole_key_filtering, | |
167 | BlockContents&& contents, Statistics* stats) | |
168 | : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering), | |
169 | policy_(table_opt.filter_policy.get()), | |
170 | prefix_extractor_(prefix_extractor), | |
171 | data_(nullptr), | |
172 | offset_(nullptr), | |
173 | num_(0), | |
174 | base_lg_(0), | |
175 | contents_(std::move(contents)) { | |
176 | assert(policy_); | |
177 | size_t n = contents_.data.size(); | |
178 | if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array | |
179 | base_lg_ = contents_.data[n - 1]; | |
180 | uint32_t last_word = DecodeFixed32(contents_.data.data() + n - 5); | |
181 | if (last_word > n - 5) return; | |
182 | data_ = contents_.data.data(); | |
183 | offset_ = data_ + last_word; | |
184 | num_ = (n - 5 - last_word) / 4; | |
185 | } | |
186 | ||
187 | bool BlockBasedFilterBlockReader::KeyMayMatch( | |
11fdf7f2 TL |
188 | const Slice& key, const SliceTransform* /* prefix_extractor */, |
189 | uint64_t block_offset, const bool /*no_io*/, | |
190 | const Slice* const /*const_ikey_ptr*/) { | |
7c673cae FG |
191 | assert(block_offset != kNotValid); |
192 | if (!whole_key_filtering_) { | |
193 | return true; | |
194 | } | |
195 | return MayMatch(key, block_offset); | |
196 | } | |
197 | ||
198 | bool BlockBasedFilterBlockReader::PrefixMayMatch( | |
11fdf7f2 TL |
199 | const Slice& prefix, const SliceTransform* /* prefix_extractor */, |
200 | uint64_t block_offset, const bool /*no_io*/, | |
201 | const Slice* const /*const_ikey_ptr*/) { | |
7c673cae | 202 | assert(block_offset != kNotValid); |
7c673cae FG |
203 | return MayMatch(prefix, block_offset); |
204 | } | |
205 | ||
206 | bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry, | |
207 | uint64_t block_offset) { | |
208 | uint64_t index = block_offset >> base_lg_; | |
209 | if (index < num_) { | |
210 | uint32_t start = DecodeFixed32(offset_ + index * 4); | |
211 | uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); | |
212 | if (start <= limit && limit <= (uint32_t)(offset_ - data_)) { | |
213 | Slice filter = Slice(data_ + start, limit - start); | |
214 | bool const may_match = policy_->KeyMayMatch(entry, filter); | |
215 | if (may_match) { | |
216 | PERF_COUNTER_ADD(bloom_sst_hit_count, 1); | |
217 | return true; | |
218 | } else { | |
219 | PERF_COUNTER_ADD(bloom_sst_miss_count, 1); | |
220 | return false; | |
221 | } | |
222 | } else if (start == limit) { | |
223 | // Empty filters do not match any entries | |
224 | return false; | |
225 | } | |
226 | } | |
227 | return true; // Errors are treated as potential matches | |
228 | } | |
229 | ||
230 | size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const { | |
231 | return num_ * 4 + 5 + (offset_ - data_); | |
232 | } | |
233 | ||
234 | std::string BlockBasedFilterBlockReader::ToString() const { | |
11fdf7f2 | 235 | std::string result; |
7c673cae FG |
236 | result.reserve(1024); |
237 | ||
238 | std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks"); | |
239 | AppendItem(&result, s_fb, rocksdb::ToString(num_)); | |
240 | AppendItem(&result, s_bo, s_hd); | |
241 | ||
242 | for (size_t index = 0; index < num_; index++) { | |
243 | uint32_t start = DecodeFixed32(offset_ + index * 4); | |
244 | uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); | |
245 | ||
246 | if (start != limit) { | |
247 | result.append(" filter block # " + rocksdb::ToString(index + 1) + "\n"); | |
248 | Slice filter = Slice(data_ + start, limit - start); | |
249 | AppendItem(&result, start, filter.ToString(true)); | |
250 | } | |
251 | } | |
252 | return result; | |
253 | } | |
254 | } // namespace rocksdb |