]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/block_based_filter_block.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / table / block_based_filter_block.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include "table/block_based_filter_block.h"
11#include <algorithm>
12
13#include "db/dbformat.h"
14#include "monitoring/perf_context_imp.h"
15#include "rocksdb/filter_policy.h"
16#include "util/coding.h"
17#include "util/string_util.h"
18
19namespace rocksdb {
20
21namespace {
22
23void AppendItem(std::string* props, const std::string& key,
24 const std::string& value) {
25 char cspace = ' ';
26 std::string value_str("");
27 size_t i = 0;
28 const size_t dataLength = 64;
29 const size_t tabLength = 2;
30 const size_t offLength = 16;
31
32 value_str.append(&value[i], std::min(size_t(dataLength), value.size()));
33 i += dataLength;
34 while (i < value.size()) {
35 value_str.append("\n");
36 value_str.append(offLength, cspace);
37 value_str.append(&value[i], std::min(size_t(dataLength), value.size() - i));
38 i += dataLength;
39 }
40
41 std::string result("");
42 if (key.size() < (offLength - tabLength))
43 result.append(size_t((offLength - tabLength)) - key.size(), cspace);
44 result.append(key);
45
46 props->append(result + ": " + value_str + "\n");
47}
48
49template <class TKey>
50void AppendItem(std::string* props, const TKey& key, const std::string& value) {
51 std::string key_str = rocksdb::ToString(key);
52 AppendItem(props, key_str, value);
53}
54} // namespace
55
56
57// See doc/table_format.txt for an explanation of the filter block format.
58
59// Generate new filter every 2KB of data
60static const size_t kFilterBaseLg = 11;
61static const size_t kFilterBase = 1 << kFilterBaseLg;
62
63BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
64 const SliceTransform* prefix_extractor,
65 const BlockBasedTableOptions& table_opt)
66 : policy_(table_opt.filter_policy.get()),
67 prefix_extractor_(prefix_extractor),
68 whole_key_filtering_(table_opt.whole_key_filtering),
69 prev_prefix_start_(0),
11fdf7f2
TL
70 prev_prefix_size_(0),
71 num_added_(0) {
7c673cae
FG
72 assert(policy_);
73}
74
75void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset) {
76 uint64_t filter_index = (block_offset / kFilterBase);
77 assert(filter_index >= filter_offsets_.size());
78 while (filter_index > filter_offsets_.size()) {
79 GenerateFilter();
80 }
81}
82
83void BlockBasedFilterBlockBuilder::Add(const Slice& key) {
84 if (prefix_extractor_ && prefix_extractor_->InDomain(key)) {
85 AddPrefix(key);
86 }
87
88 if (whole_key_filtering_) {
89 AddKey(key);
90 }
91}
92
93// Add key to filter if needed
94inline void BlockBasedFilterBlockBuilder::AddKey(const Slice& key) {
11fdf7f2 95 num_added_++;
7c673cae
FG
96 start_.push_back(entries_.size());
97 entries_.append(key.data(), key.size());
98}
99
100// Add prefix to filter if needed
101inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice& key) {
102 // get slice for most recently added entry
103 Slice prev;
104 if (prev_prefix_size_ > 0) {
105 prev = Slice(entries_.data() + prev_prefix_start_, prev_prefix_size_);
106 }
107
108 Slice prefix = prefix_extractor_->Transform(key);
109 // insert prefix only when it's different from the previous prefix.
110 if (prev.size() == 0 || prefix != prev) {
7c673cae
FG
111 prev_prefix_start_ = entries_.size();
112 prev_prefix_size_ = prefix.size();
11fdf7f2 113 AddKey(prefix);
7c673cae
FG
114 }
115}
116
11fdf7f2 117Slice BlockBasedFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
7c673cae
FG
118 Status* status) {
119 // In this impl we ignore BlockHandle
120 *status = Status::OK();
121 if (!start_.empty()) {
122 GenerateFilter();
123 }
124
125 // Append array of per-filter offsets
126 const uint32_t array_offset = static_cast<uint32_t>(result_.size());
127 for (size_t i = 0; i < filter_offsets_.size(); i++) {
128 PutFixed32(&result_, filter_offsets_[i]);
129 }
130
131 PutFixed32(&result_, array_offset);
132 result_.push_back(kFilterBaseLg); // Save encoding parameter in result
133 return Slice(result_);
134}
135
136void BlockBasedFilterBlockBuilder::GenerateFilter() {
137 const size_t num_entries = start_.size();
138 if (num_entries == 0) {
139 // Fast path if there are no keys for this filter
140 filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
141 return;
142 }
143
144 // Make list of keys from flattened key structure
145 start_.push_back(entries_.size()); // Simplify length computation
146 tmp_entries_.resize(num_entries);
147 for (size_t i = 0; i < num_entries; i++) {
148 const char* base = entries_.data() + start_[i];
149 size_t length = start_[i + 1] - start_[i];
150 tmp_entries_[i] = Slice(base, length);
151 }
152
153 // Generate filter for current set of keys and append to result_.
154 filter_offsets_.push_back(static_cast<uint32_t>(result_.size()));
155 policy_->CreateFilter(&tmp_entries_[0], static_cast<int>(num_entries),
156 &result_);
157
158 tmp_entries_.clear();
159 entries_.clear();
160 start_.clear();
161 prev_prefix_start_ = 0;
162 prev_prefix_size_ = 0;
163}
164
165BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
166 const SliceTransform* prefix_extractor,
167 const BlockBasedTableOptions& table_opt, bool _whole_key_filtering,
168 BlockContents&& contents, Statistics* stats)
169 : FilterBlockReader(contents.data.size(), stats, _whole_key_filtering),
170 policy_(table_opt.filter_policy.get()),
171 prefix_extractor_(prefix_extractor),
172 data_(nullptr),
173 offset_(nullptr),
174 num_(0),
175 base_lg_(0),
176 contents_(std::move(contents)) {
177 assert(policy_);
178 size_t n = contents_.data.size();
179 if (n < 5) return; // 1 byte for base_lg_ and 4 for start of offset array
180 base_lg_ = contents_.data[n - 1];
181 uint32_t last_word = DecodeFixed32(contents_.data.data() + n - 5);
182 if (last_word > n - 5) return;
183 data_ = contents_.data.data();
184 offset_ = data_ + last_word;
185 num_ = (n - 5 - last_word) / 4;
186}
187
188bool BlockBasedFilterBlockReader::KeyMayMatch(
11fdf7f2
TL
189 const Slice& key, const SliceTransform* /* prefix_extractor */,
190 uint64_t block_offset, const bool /*no_io*/,
191 const Slice* const /*const_ikey_ptr*/) {
7c673cae
FG
192 assert(block_offset != kNotValid);
193 if (!whole_key_filtering_) {
194 return true;
195 }
196 return MayMatch(key, block_offset);
197}
198
199bool BlockBasedFilterBlockReader::PrefixMayMatch(
11fdf7f2
TL
200 const Slice& prefix, const SliceTransform* /* prefix_extractor */,
201 uint64_t block_offset, const bool /*no_io*/,
202 const Slice* const /*const_ikey_ptr*/) {
7c673cae 203 assert(block_offset != kNotValid);
7c673cae
FG
204 return MayMatch(prefix, block_offset);
205}
206
207bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry,
208 uint64_t block_offset) {
209 uint64_t index = block_offset >> base_lg_;
210 if (index < num_) {
211 uint32_t start = DecodeFixed32(offset_ + index * 4);
212 uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
213 if (start <= limit && limit <= (uint32_t)(offset_ - data_)) {
214 Slice filter = Slice(data_ + start, limit - start);
215 bool const may_match = policy_->KeyMayMatch(entry, filter);
216 if (may_match) {
217 PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
218 return true;
219 } else {
220 PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
221 return false;
222 }
223 } else if (start == limit) {
224 // Empty filters do not match any entries
225 return false;
226 }
227 }
228 return true; // Errors are treated as potential matches
229}
230
231size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
232 return num_ * 4 + 5 + (offset_ - data_);
233}
234
235std::string BlockBasedFilterBlockReader::ToString() const {
11fdf7f2 236 std::string result;
7c673cae
FG
237 result.reserve(1024);
238
239 std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
240 AppendItem(&result, s_fb, rocksdb::ToString(num_));
241 AppendItem(&result, s_bo, s_hd);
242
243 for (size_t index = 0; index < num_; index++) {
244 uint32_t start = DecodeFixed32(offset_ + index * 4);
245 uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
246
247 if (start != limit) {
248 result.append(" filter block # " + rocksdb::ToString(index + 1) + "\n");
249 Slice filter = Slice(data_ + start, limit - start);
250 AppendItem(&result, start, filter.ToString(true));
251 }
252 }
253 return result;
254}
255} // namespace rocksdb