]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/block_based_filter_block.cc
bump version to 12.2.12-pve1
[ceph.git] / ceph / src / rocksdb / table / block_based_filter_block.cc
CommitLineData
7c673cae
FG
1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under the BSD-style license found in the
3// LICENSE file in the root directory of this source tree. An additional grant
4// of patent rights can be found in the PATENTS file in the same directory.
5//
6// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "table/block_based_filter_block.h"
11#include <algorithm>
12
13#include "db/dbformat.h"
14#include "monitoring/perf_context_imp.h"
15#include "rocksdb/filter_policy.h"
16#include "util/coding.h"
17#include "util/string_util.h"
18
19namespace rocksdb {
20
21namespace {
22
23void AppendItem(std::string* props, const std::string& key,
24 const std::string& value) {
25 char cspace = ' ';
26 std::string value_str("");
27 size_t i = 0;
28 const size_t dataLength = 64;
29 const size_t tabLength = 2;
30 const size_t offLength = 16;
31
32 value_str.append(&value[i], std::min(size_t(dataLength), value.size()));
33 i += dataLength;
34 while (i < value.size()) {
35 value_str.append("\n");
36 value_str.append(offLength, cspace);
37 value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i));
38 i += dataLength;
39 }
40
41 std::string result("");
42 if (key.size() < (offLength - tabLength))
43 result.append(size_t((offLength - tabLength)) - key.size(), cspace);
44 result.append(key);
45
46 props->append(result + ": " + value_str + "\n");
47}
48
49template <class TKey>
50void AppendItem(std::string* props, const TKey& key, const std::string& value) {
51 std::string key_str = rocksdb::ToString(key);
52 AppendItem(props, key_str, value);
53}
54} // namespace
55
56
57// See doc/table_format.txt for an explanation of the filter block format.
58
59// Generate new filter every 2KB of data
60static const size_t kFilterBaseLg = 11;
61static const size_t kFilterBase = 1 << kFilterBaseLg;
62
63BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
64 const SliceTransform* prefix_extractor,
65 const BlockBasedTableOptions& table_opt)
66 : policy_(table_opt.filter_policy.get()),
67 prefix_extractor_(prefix_extractor),
68 whole_key_filtering_(table_opt.whole_key_filtering),
69 prev_prefix_start_(0),
70 prev_prefix_size_(0) {
71 assert(policy_);
72}
73
74void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) {
75 uint64_t filter_index = (block_offset / kFilterBase);
76 assert(filter_index >= filter_offsets_.size());
77 while (filter_index > filter_offsets_.size()) {
78 GenerateFilter();
79 }
80}
81
82void BlockBasedFilterBlockBuilder::Add(const Slice& key) {
83 if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
84 AddPrefix(key);
85 }
86
87 if (whole_key_filtering_) {
88 AddKey(key);
89 }
90}
91
92// Add key to filter if needed
93inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) {
94 start_.push_back(entries_.size());
95 entries_.append(key.data(), key.size());
96}
97
98// Add prefix to filter if needed
99inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
100 // get slice for most recently added entry
101 Slice prev;
102 if (prev_prefix_size_ > 0) {
103 prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_);
104 }
105
106 Slice prefix = prefix_extractor_->Transform(key);
107 // insert prefix only when it's different from the previous prefix.
108 if (prev.size() == 0 || prefix != prev) {
109 start_.push_back(entries_.size());
110 prev_prefix_start_ = entries_.size();
111 prev_prefix_size_ = prefix.size();
112 entries_.append(prefix.data(), prefix.size());
113 }
114}
115
116Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& tmp,
117 Status* status) {
118 // In this impl we ignore BlockHandle
119 *status = Status::OK();
120 if (!start_.empty()) {
121 GenerateFilter();
122 }
123
124 // Append array of per-filter offsets
125 const uint32_t array_offset = static_cast<uint32_t>(result_.size());
126 for (size_t i = 0; i < filter_offsets_.size(); i++) {
127 PutFixed32(&result_, filter_offsets_[i]);
128 }
129
130 PutFixed32(&result_, array_offset);
131 result_.push_back(kFilterBaseLg); // Save encoding parameter in result
132 return Slice(result_);
133}
134
135void BlockBasedFilterBlockBuilder::GenerateFilter() {
136 const size_t num_entries = start_.size();
137 if (num_entries == 0) {
138 // Fast path if there are no keys for this filter
139 filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
140 return;
141 }
142
143 // Make list of keys from flattened key structure
144 start_.push_back(entries_.size()); // Simplify length computation
145 tmp_entries_.resize(num_entries);
146 for (size_t i = 0; i < num_entries; i++) {
147 const char* base = entries_.data() + start_[i];
148 size_t length = start_[i + 1] - start_[i];
149 tmp_entries_[i] = Slice(base, length);
150 }
151
152 // Generate filter for current set of keys and append to result_.
153 filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
154 policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries),
155 &result_);
156
157 tmp_entries_.clear();
158 entries_.clear();
159 start_.clear();
160 prev_prefix_start_ = 0;
161 prev_prefix_size_ = 0;
162}
163
164BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
165 const SliceTransform* prefix_extractor,
166 const BlockBasedTableOptions& table_opt, bool _whole_key_filtering,
167 BlockContents&& contents, Statistics* stats)
168 : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
169 policy_(table_opt.filter_policy.get()),
170 prefix_extractor_(prefix_extractor),
171 data_(nullptr),
172 offset_(nullptr),
173 num_(0),
174 base_lg_(0),
175 contents_(std::move(contents)) {
176 assert(policy_);
177 size_t n = contents_.data.size();
178 if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
179 base_lg_ = contents_.data[n - 1];
180 uint32_t last_word = DecodeFixed32(contents_.data.data() + n - 5);
181 if (last_word > n - 5) return;
182 data_ = contents_.data.data();
183 offset_ = data_ + last_word;
184 num_ = (n - 5 - last_word) / 4;
185}
186
187bool BlockBasedFilterBlockReader::KeyMayMatch(
188 const Slice& key, uint64_t block_offset, const bool no_io,
189 const Slice* const const_ikey_ptr) {
190 assert(block_offset != kNotValid);
191 if (!whole_key_filtering_) {
192 return true;
193 }
194 return MayMatch(key, block_offset);
195}
196
197bool BlockBasedFilterBlockReader::PrefixMayMatch(
198 const Slice& prefix, uint64_t block_offset, const bool no_io,
199 const Slice* const const_ikey_ptr) {
200 assert(block_offset != kNotValid);
201 if (!prefix_extractor_) {
202 return true;
203 }
204 return MayMatch(prefix, block_offset);
205}
206
207bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry,
208 uint64_t block_offset) {
209 uint64_t index = block_offset >> base_lg_;
210 if (index < num_) {
211 uint32_t start = DecodeFixed32(offset_ + index * 4);
212 uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
213 if (start <= limit && limit <= (uint32_t)(offset_ - data_)) {
214 Slice filter = Slice(data_ + start, limit - start);
215 bool const may_match = policy_->KeyMayMatch(entry, filter);
216 if (may_match) {
217 PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
218 return true;
219 } else {
220 PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
221 return false;
222 }
223 } else if (start == limit) {
224 // Empty filters do not match any entries
225 return false;
226 }
227 }
228 return true; // Errors are treated as potential matches
229}
230
231size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
232 return num_ * 4 + 5 + (offset_ - data_);
233}
234
235std::string BlockBasedFilterBlockReader::ToString() const {
236 std::string result, filter_meta;
237 result.reserve(1024);
238
239 std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
240 AppendItem(&result, s_fb, rocksdb::ToString(num_));
241 AppendItem(&result, s_bo, s_hd);
242
243 for (size_t index = 0; index < num_; index++) {
244 uint32_t start = DecodeFixed32(offset_ + index * 4);
245 uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
246
247 if (start != limit) {
248 result.append(" filter block # " + rocksdb::ToString(index + 1) + "\n");
249 Slice filter = Slice(data_ + start, limit - start);
250 AppendItem(&result, start, filter.ToString(true));
251 }
252 }
253 return result;
254}
255} // namespace rocksdb