]>
git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/table/block_based_filter_block.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
10 #include "table/block_based_filter_block.h"
13 #include "db/dbformat.h"
14 #include "monitoring/perf_context_imp.h"
15 #include "rocksdb/filter_policy.h"
16 #include "util/coding.h"
17 #include "util/string_util.h"
23 void AppendItem(std::string
* props
, const std::string
& key
,
24 const std::string
& value
) {
26 std::string
value_str("");
28 const size_t dataLength
= 64;
29 const size_t tabLength
= 2;
30 const size_t offLength
= 16;
32 value_str
.append(&value
[i
], std::min(size_t(dataLength
), value
.size()));
34 while (i
< value
.size()) {
35 value_str
.append("\n");
36 value_str
.append(offLength
, cspace
);
37 value_str
.append(&value
[i
], std::min(size_t(dataLength
), value
.size() - i
));
41 std::string
result("");
42 if (key
.size() < (offLength
- tabLength
))
43 result
.append(size_t((offLength
- tabLength
)) - key
.size(), cspace
);
46 props
->append(result
+ ": " + value_str
+ "\n");
50 void AppendItem(std::string
* props
, const TKey
& key
, const std::string
& value
) {
51 std::string key_str
= rocksdb::ToString(key
);
52 AppendItem(props
, key_str
, value
);
56 // See doc/table_format.txt for an explanation of the filter block format.
58 // Generate new filter every 2KB of data
59 static const size_t kFilterBaseLg
= 11;
60 static const size_t kFilterBase
= 1 << kFilterBaseLg
;
62 BlockBasedFilterBlockBuilder::BlockBasedFilterBlockBuilder(
63 const SliceTransform
* prefix_extractor
,
64 const BlockBasedTableOptions
& table_opt
)
65 : policy_(table_opt
.filter_policy
.get()),
66 prefix_extractor_(prefix_extractor
),
67 whole_key_filtering_(table_opt
.whole_key_filtering
),
68 prev_prefix_start_(0),
74 void BlockBasedFilterBlockBuilder::StartBlock(uint64_t block_offset
) {
75 uint64_t filter_index
= (block_offset
/ kFilterBase
);
76 assert(filter_index
>= filter_offsets_
.size());
77 while (filter_index
> filter_offsets_
.size()) {
82 void BlockBasedFilterBlockBuilder::Add(const Slice
& key
) {
83 if (prefix_extractor_
&& prefix_extractor_
->InDomain(key
)) {
87 if (whole_key_filtering_
) {
92 // Add key to filter if needed
93 inline void BlockBasedFilterBlockBuilder::AddKey(const Slice
& key
) {
95 start_
.push_back(entries_
.size());
96 entries_
.append(key
.data(), key
.size());
99 // Add prefix to filter if needed
100 inline void BlockBasedFilterBlockBuilder::AddPrefix(const Slice
& key
) {
101 // get slice for most recently added entry
103 if (prev_prefix_size_
> 0) {
104 prev
= Slice(entries_
.data() + prev_prefix_start_
, prev_prefix_size_
);
107 Slice prefix
= prefix_extractor_
->Transform(key
);
108 // insert prefix only when it's different from the previous prefix.
109 if (prev
.size() == 0 || prefix
!= prev
) {
110 prev_prefix_start_
= entries_
.size();
111 prev_prefix_size_
= prefix
.size();
116 Slice
BlockBasedFilterBlockBuilder::Finish(const BlockHandle
& /*tmp*/,
118 // In this impl we ignore BlockHandle
119 *status
= Status::OK();
120 if (!start_
.empty()) {
124 // Append array of per-filter offsets
125 const uint32_t array_offset
= static_cast<uint32_t>(result_
.size());
126 for (size_t i
= 0; i
< filter_offsets_
.size(); i
++) {
127 PutFixed32(&result_
, filter_offsets_
[i
]);
130 PutFixed32(&result_
, array_offset
);
131 result_
.push_back(kFilterBaseLg
); // Save encoding parameter in result
132 return Slice(result_
);
135 void BlockBasedFilterBlockBuilder::GenerateFilter() {
136 const size_t num_entries
= start_
.size();
137 if (num_entries
== 0) {
138 // Fast path if there are no keys for this filter
139 filter_offsets_
.push_back(static_cast<uint32_t>(result_
.size()));
143 // Make list of keys from flattened key structure
144 start_
.push_back(entries_
.size()); // Simplify length computation
145 tmp_entries_
.resize(num_entries
);
146 for (size_t i
= 0; i
< num_entries
; i
++) {
147 const char* base
= entries_
.data() + start_
[i
];
148 size_t length
= start_
[i
+ 1] - start_
[i
];
149 tmp_entries_
[i
] = Slice(base
, length
);
152 // Generate filter for current set of keys and append to result_.
153 filter_offsets_
.push_back(static_cast<uint32_t>(result_
.size()));
154 policy_
->CreateFilter(&tmp_entries_
[0], static_cast<int>(num_entries
),
157 tmp_entries_
.clear();
160 prev_prefix_start_
= 0;
161 prev_prefix_size_
= 0;
164 BlockBasedFilterBlockReader::BlockBasedFilterBlockReader(
165 const SliceTransform
* prefix_extractor
,
166 const BlockBasedTableOptions
& table_opt
, bool _whole_key_filtering
,
167 BlockContents
&& contents
, Statistics
* stats
)
168 : FilterBlockReader(contents
.data
.size(), stats
, _whole_key_filtering
),
169 policy_(table_opt
.filter_policy
.get()),
170 prefix_extractor_(prefix_extractor
),
175 contents_(std::move(contents
)) {
177 size_t n
= contents_
.data
.size();
178 if (n
< 5) return; // 1 byte for base_lg_ and 4 for start of offset array
179 base_lg_
= contents_
.data
[n
- 1];
180 uint32_t last_word
= DecodeFixed32(contents_
.data
.data() + n
- 5);
181 if (last_word
> n
- 5) return;
182 data_
= contents_
.data
.data();
183 offset_
= data_
+ last_word
;
184 num_
= (n
- 5 - last_word
) / 4;
187 bool BlockBasedFilterBlockReader::KeyMayMatch(
188 const Slice
& key
, const SliceTransform
* /* prefix_extractor */,
189 uint64_t block_offset
, const bool /*no_io*/,
190 const Slice
* const /*const_ikey_ptr*/) {
191 assert(block_offset
!= kNotValid
);
192 if (!whole_key_filtering_
) {
195 return MayMatch(key
, block_offset
);
198 bool BlockBasedFilterBlockReader::PrefixMayMatch(
199 const Slice
& prefix
, const SliceTransform
* /* prefix_extractor */,
200 uint64_t block_offset
, const bool /*no_io*/,
201 const Slice
* const /*const_ikey_ptr*/) {
202 assert(block_offset
!= kNotValid
);
203 return MayMatch(prefix
, block_offset
);
206 bool BlockBasedFilterBlockReader::MayMatch(const Slice
& entry
,
207 uint64_t block_offset
) {
208 uint64_t index
= block_offset
>> base_lg_
;
210 uint32_t start
= DecodeFixed32(offset_
+ index
* 4);
211 uint32_t limit
= DecodeFixed32(offset_
+ index
* 4 + 4);
212 if (start
<= limit
&& limit
<= (uint32_t)(offset_
- data_
)) {
213 Slice filter
= Slice(data_
+ start
, limit
- start
);
214 bool const may_match
= policy_
->KeyMayMatch(entry
, filter
);
216 PERF_COUNTER_ADD(bloom_sst_hit_count
, 1);
219 PERF_COUNTER_ADD(bloom_sst_miss_count
, 1);
222 } else if (start
== limit
) {
223 // Empty filters do not match any entries
227 return true; // Errors are treated as potential matches
230 size_t BlockBasedFilterBlockReader::ApproximateMemoryUsage() const {
231 return num_
* 4 + 5 + (offset_
- data_
);
234 std::string
BlockBasedFilterBlockReader::ToString() const {
236 result
.reserve(1024);
238 std::string
s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
239 AppendItem(&result
, s_fb
, rocksdb::ToString(num_
));
240 AppendItem(&result
, s_bo
, s_hd
);
242 for (size_t index
= 0; index
< num_
; index
++) {
243 uint32_t start
= DecodeFixed32(offset_
+ index
* 4);
244 uint32_t limit
= DecodeFixed32(offset_
+ index
* 4 + 4);
246 if (start
!= limit
) {
247 result
.append(" filter block # " + rocksdb::ToString(index
+ 1) + "\n");
248 Slice filter
= Slice(data_
+ start
, limit
- start
);
249 AppendItem(&result
, start
, filter
.ToString(true));
254 } // namespace rocksdb