]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/block_based_filter_block.cc
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / rocksdb / table / block_based_filter_block.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "table/block_based_filter_block.h"
11#include <algorithm>
12
13#include "db/dbformat.h"
14#include "monitoring/perf_context_imp.h"
15#include "rocksdb/filter_policy.h"
16#include "util/coding.h"
17#include "util/string_util.h"
18
19namespace rocksdb {
20
21namespace {
22
23void AppendItem(std::string* props, const std::string& key,
24 const std::string& value) {
25 char cspace = ' ';
26 std::string value_str("");
27 size_t i = 0;
28 const size_t dataLength = 64;
29 const size_t tabLength = 2;
30 const size_t offLength = 16;
31
32 value_str.append(&value[i], std::min(size_t(dataLength), value.size()));
33 i += dataLength;
34 while (i < value.size()) {
35 value_str.append("\n");
36 value_str.append(offLength, cspace);
37 value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i));
38 i += dataLength;
39 }
40
41 std::string result("");
42 if (key.size() < (offLength - tabLength))
43 result.append(size_t((offLength - tabLength)) - key.size(), cspace);
44 result.append(key);
45
46 props->append(result + ": " + value_str + "\n");
47}
48
49template <class TKey>
50void AppendItem(std::string* props, const TKey& key, const std::string& value) {
51 std::string key_str = rocksdb::ToString(key);
52 AppendItem(props, key_str, value);
53}
54} // namespace
55
7c673cae
FG
56// See doc/table_format.txt for an explanation of the filter block format.
57
58// Generate new filter every 2KB of data
59static const size_t kFilterBaseLg = 11;
60static const size_t kFilterBase = 1 << kFilterBaseLg;
61
62BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
63 const SliceTransform* prefix_extractor,
64 const BlockBasedTableOptions& table_opt)
65 : policy_(table_opt.filter_policy.get()),
66 prefix_extractor_(prefix_extractor),
67 whole_key_filtering_(table_opt.whole_key_filtering),
68 prev_prefix_start_(0),
11fdf7f2
TL
69 prev_prefix_size_(0),
70 num_added_(0) {
7c673cae
FG
71 assert(policy_);
72}
73
74void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) {
75 uint64_t filter_index = (block_offset / kFilterBase);
76 assert(filter_index >= filter_offsets_.size());
77 while (filter_index > filter_offsets_.size()) {
78 GenerateFilter();
79 }
80}
81
82void BlockBasedFilterBlockBuilder::Add(const Slice& key) {
83 if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
84 AddPrefix(key);
85 }
86
87 if (whole_key_filtering_) {
88 AddKey(key);
89 }
90}
91
92// Add key to filter if needed
93inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) {
11fdf7f2 94 num_added_++;
7c673cae
FG
95 start_.push_back(entries_.size());
96 entries_.append(key.data(), key.size());
97}
98
99// Add prefix to filter if needed
100inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
101 // get slice for most recently added entry
102 Slice prev;
103 if (prev_prefix_size_ > 0) {
104 prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_);
105 }
106
107 Slice prefix = prefix_extractor_->Transform(key);
108 // insert prefix only when it's different from the previous prefix.
109 if (prev.size() == 0 || prefix != prev) {
7c673cae
FG
110 prev_prefix_start_ = entries_.size();
111 prev_prefix_size_ = prefix.size();
11fdf7f2 112 AddKey(prefix);
7c673cae
FG
113 }
114}
115
11fdf7f2 116Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
7c673cae
FG
117 Status* status) {
118 // In this impl we ignore BlockHandle
119 *status = Status::OK();
120 if (!start_.empty()) {
121 GenerateFilter();
122 }
123
124 // Append array of per-filter offsets
125 const uint32_t array_offset = static_cast<uint32_t>(result_.size());
126 for (size_t i = 0; i < filter_offsets_.size(); i++) {
127 PutFixed32(&result_, filter_offsets_[i]);
128 }
129
130 PutFixed32(&result_, array_offset);
131 result_.push_back(kFilterBaseLg); // Save encoding parameter in result
132 return Slice(result_);
133}
134
135void BlockBasedFilterBlockBuilder::GenerateFilter() {
136 const size_t num_entries = start_.size();
137 if (num_entries == 0) {
138 // Fast path if there are no keys for this filter
139 filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
140 return;
141 }
142
143 // Make list of keys from flattened key structure
144 start_.push_back(entries_.size()); // Simplify length computation
145 tmp_entries_.resize(num_entries);
146 for (size_t i = 0; i < num_entries; i++) {
147 const char* base = entries_.data() + start_[i];
148 size_t length = start_[i + 1] - start_[i];
149 tmp_entries_[i] = Slice(base, length);
150 }
151
152 // Generate filter for current set of keys and append to result_.
153 filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
154 policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries),
155 &result_);
156
157 tmp_entries_.clear();
158 entries_.clear();
159 start_.clear();
160 prev_prefix_start_ = 0;
161 prev_prefix_size_ = 0;
162}
163
164BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
165 const SliceTransform* prefix_extractor,
166 const BlockBasedTableOptions& table_opt, bool _whole_key_filtering,
167 BlockContents&& contents, Statistics* stats)
168 : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
169 policy_(table_opt.filter_policy.get()),
170 prefix_extractor_(prefix_extractor),
171 data_(nullptr),
172 offset_(nullptr),
173 num_(0),
174 base_lg_(0),
175 contents_(std::move(contents)) {
176 assert(policy_);
177 size_t n = contents_.data.size();
178 if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
179 base_lg_ = contents_.data[n - 1];
180 uint32_t last_word = DecodeFixed32(contents_.data.data() + n - 5);
181 if (last_word > n - 5) return;
182 data_ = contents_.data.data();
183 offset_ = data_ + last_word;
184 num_ = (n - 5 - last_word) / 4;
185}
186
187bool BlockBasedFilterBlockReader::KeyMayMatch(
11fdf7f2
TL
188 const Slice& key, const SliceTransform* /* prefix_extractor */,
189 uint64_t block_offset, const bool /*no_io*/,
190 const Slice* const /*const_ikey_ptr*/) {
7c673cae
FG
191 assert(block_offset != kNotValid);
192 if (!whole_key_filtering_) {
193 return true;
194 }
195 return MayMatch(key, block_offset);
196}
197
198bool BlockBasedFilterBlockReader::PrefixMayMatch(
11fdf7f2
TL
199 const Slice& prefix, const SliceTransform* /* prefix_extractor */,
200 uint64_t block_offset, const bool /*no_io*/,
201 const Slice* const /*const_ikey_ptr*/) {
7c673cae 202 assert(block_offset != kNotValid);
7c673cae
FG
203 return MayMatch(prefix, block_offset);
204}
205
206bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry,
207 uint64_t block_offset) {
208 uint64_t index = block_offset >> base_lg_;
209 if (index < num_) {
210 uint32_t start = DecodeFixed32(offset_ + index * 4);
211 uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
212 if (start <= limit && limit <= (uint32_t)(offset_ - data_)) {
213 Slice filter = Slice(data_ + start, limit - start);
214 bool const may_match = policy_->KeyMayMatch(entry, filter);
215 if (may_match) {
216 PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
217 return true;
218 } else {
219 PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
220 return false;
221 }
222 } else if (start == limit) {
223 // Empty filters do not match any entries
224 return false;
225 }
226 }
227 return true; // Errors are treated as potential matches
228}
229
230size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
231 return num_ * 4 + 5 + (offset_ - data_);
232}
233
234std::string BlockBasedFilterBlockReader::ToString() const {
11fdf7f2 235 std::string result;
7c673cae
FG
236 result.reserve(1024);
237
238 std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
239 AppendItem(&result, s_fb, rocksdb::ToString(num_));
240 AppendItem(&result, s_bo, s_hd);
241
242 for (size_t index = 0; index < num_; index++) {
243 uint32_t start = DecodeFixed32(offset_ + index * 4);
244 uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
245
246 if (start != limit) {
247 result.append(" filter block # " + rocksdb::ToString(index + 1) + "\n");
248 Slice filter = Slice(data_ + start, limit - start);
249 AppendItem(&result, start, filter.ToString(true));
250 }
251 }
252 return result;
253}
254} // namespace rocksdb