]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/HitSet.h
import ceph quincy 17.2.1
[ceph.git] / ceph / src / osd / HitSet.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank <info@inktank.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_OSD_HITSET_H
16 #define CEPH_OSD_HITSET_H
17
18 #include <string_view>
19
20 #include <boost/scoped_ptr.hpp>
21
22 #include "include/encoding.h"
23 #include "include/unordered_set.h"
24 #include "common/bloom_filter.hpp"
25 #include "common/hobject.h"
26
27 /**
28 * generic container for a HitSet
29 *
30 * Encapsulate a HitSetImpl of any type. Expose a generic interface
31 * to users and wrap the encoded object with a type so that it can be
32 * safely decoded later.
33 */
34
35 class HitSet {
36 public:
37 typedef enum {
38 TYPE_NONE = 0,
39 TYPE_EXPLICIT_HASH = 1,
40 TYPE_EXPLICIT_OBJECT = 2,
41 TYPE_BLOOM = 3
42 } impl_type_t;
43
44 static std::string_view get_type_name(impl_type_t t) {
45 switch (t) {
46 case TYPE_NONE: return "none";
47 case TYPE_EXPLICIT_HASH: return "explicit_hash";
48 case TYPE_EXPLICIT_OBJECT: return "explicit_object";
49 case TYPE_BLOOM: return "bloom";
50 default: return "???";
51 }
52 }
53 std::string_view get_type_name() const {
54 if (impl)
55 return get_type_name(impl->get_type());
56 return get_type_name(TYPE_NONE);
57 }
58
59 /// abstract interface for a HitSet implementation
60 class Impl {
61 public:
62 virtual impl_type_t get_type() const = 0;
63 virtual bool is_full() const = 0;
64 virtual void insert(const hobject_t& o) = 0;
65 virtual bool contains(const hobject_t& o) const = 0;
66 virtual unsigned insert_count() const = 0;
67 virtual unsigned approx_unique_insert_count() const = 0;
68 virtual void encode(ceph::buffer::list &bl) const = 0;
69 virtual void decode(ceph::buffer::list::const_iterator& p) = 0;
70 virtual void dump(ceph::Formatter *f) const = 0;
71 virtual Impl* clone() const = 0;
72 virtual void seal() {}
73 virtual ~Impl() {}
74 };
75
76 boost::scoped_ptr<Impl> impl;
77 bool sealed;
78
79 class Params {
80 /// create an Impl* of the given type
81 bool create_impl(impl_type_t t);
82
83 public:
84 class Impl {
85 public:
86 virtual impl_type_t get_type() const = 0;
87 virtual HitSet::Impl *get_new_impl() const = 0;
88 virtual void encode(ceph::buffer::list &bl) const {}
89 virtual void decode(ceph::buffer::list::const_iterator& p) {}
90 virtual void dump(ceph::Formatter *f) const {}
91 virtual void dump_stream(std::ostream& o) const {}
92 virtual ~Impl() {}
93 };
94
95 Params() {}
96 explicit Params(Impl *i) : impl(i) {}
97 virtual ~Params() {}
98
99 boost::scoped_ptr<Params::Impl> impl;
100
101 impl_type_t get_type() const {
102 if (impl)
103 return impl->get_type();
104 return TYPE_NONE;
105 }
106
107 Params(const Params& o) noexcept;
108 const Params& operator=(const Params& o);
109
110 void encode(ceph::buffer::list &bl) const;
111 void decode(ceph::buffer::list::const_iterator& bl);
112 void dump(ceph::Formatter *f) const;
113 static void generate_test_instances(std::list<HitSet::Params*>& o);
114
115 friend std::ostream& operator<<(std::ostream& out, const HitSet::Params& p);
116 };
117
118 HitSet() : impl(NULL), sealed(false) {}
119 explicit HitSet(Impl *i) : impl(i), sealed(false) {}
120 explicit HitSet(const HitSet::Params& params);
121
122 HitSet(const HitSet& o) {
123 sealed = o.sealed;
124 if (o.impl)
125 impl.reset(o.impl->clone());
126 else
127 impl.reset(NULL);
128 }
129 const HitSet& operator=(const HitSet& o) {
130 sealed = o.sealed;
131 if (o.impl)
132 impl.reset(o.impl->clone());
133 else
134 impl.reset(NULL);
135 return *this;
136 }
137
138
139 bool is_full() const {
140 return impl->is_full();
141 }
142 /// insert a hash into the set
143 void insert(const hobject_t& o) {
144 impl->insert(o);
145 }
146 /// query whether a hash is in the set
147 bool contains(const hobject_t& o) const {
148 return impl->contains(o);
149 }
150
151 unsigned insert_count() const {
152 return impl->insert_count();
153 }
154 unsigned approx_unique_insert_count() const {
155 return impl->approx_unique_insert_count();
156 }
157 void seal() {
158 ceph_assert(!sealed);
159 sealed = true;
160 impl->seal();
161 }
162
163 void encode(ceph::buffer::list &bl) const;
164 void decode(ceph::buffer::list::const_iterator& bl);
165 void dump(ceph::Formatter *f) const;
166 static void generate_test_instances(std::list<HitSet*>& o);
167
168 private:
169 void reset_to_type(impl_type_t type);
170 };
171 WRITE_CLASS_ENCODER(HitSet)
172 WRITE_CLASS_ENCODER(HitSet::Params)
173
174 typedef boost::shared_ptr<HitSet> HitSetRef;
175
176 std::ostream& operator<<(std::ostream& out, const HitSet::Params& p);
177
178 /**
179 * explicitly enumerate hash hits in the set
180 */
181 class ExplicitHashHitSet : public HitSet::Impl {
182 uint64_t count;
183 ceph::unordered_set<uint32_t> hits;
184 public:
185 class Params : public HitSet::Params::Impl {
186 public:
187 HitSet::impl_type_t get_type() const override {
188 return HitSet::TYPE_EXPLICIT_HASH;
189 }
190 HitSet::Impl *get_new_impl() const override {
191 return new ExplicitHashHitSet;
192 }
193 static void generate_test_instances(std::list<Params*>& o) {
194 o.push_back(new Params);
195 }
196 };
197
198 ExplicitHashHitSet() : count(0) {}
199 explicit ExplicitHashHitSet(const ExplicitHashHitSet::Params *p) : count(0) {}
200 ExplicitHashHitSet(const ExplicitHashHitSet &o) : count(o.count),
201 hits(o.hits) {}
202
203 HitSet::Impl *clone() const override {
204 return new ExplicitHashHitSet(*this);
205 }
206
207 HitSet::impl_type_t get_type() const override {
208 return HitSet::TYPE_EXPLICIT_HASH;
209 }
210 bool is_full() const override {
211 return false;
212 }
213 void insert(const hobject_t& o) override {
214 hits.insert(o.get_hash());
215 ++count;
216 }
217 bool contains(const hobject_t& o) const override {
218 return hits.count(o.get_hash());
219 }
220 unsigned insert_count() const override {
221 return count;
222 }
223 unsigned approx_unique_insert_count() const override {
224 return hits.size();
225 }
226 void encode(ceph::buffer::list &bl) const override {
227 ENCODE_START(1, 1, bl);
228 encode(count, bl);
229 encode(hits, bl);
230 ENCODE_FINISH(bl);
231 }
232 void decode(ceph::buffer::list::const_iterator &bl) override {
233 DECODE_START(1, bl);
234 decode(count, bl);
235 decode(hits, bl);
236 DECODE_FINISH(bl);
237 }
238 void dump(ceph::Formatter *f) const override;
239 static void generate_test_instances(std::list<ExplicitHashHitSet*>& o) {
240 o.push_back(new ExplicitHashHitSet);
241 o.push_back(new ExplicitHashHitSet);
242 o.back()->insert(hobject_t());
243 o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
244 o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
245 }
246 };
247 WRITE_CLASS_ENCODER(ExplicitHashHitSet)
248
249 /**
250 * explicitly enumerate objects in the set
251 */
252 class ExplicitObjectHitSet : public HitSet::Impl {
253 uint64_t count;
254 ceph::unordered_set<hobject_t> hits;
255 public:
256 class Params : public HitSet::Params::Impl {
257 public:
258 HitSet::impl_type_t get_type() const override {
259 return HitSet::TYPE_EXPLICIT_OBJECT;
260 }
261 HitSet::Impl *get_new_impl() const override {
262 return new ExplicitObjectHitSet;
263 }
264 static void generate_test_instances(std::list<Params*>& o) {
265 o.push_back(new Params);
266 }
267 };
268
269 ExplicitObjectHitSet() : count(0) {}
270 explicit ExplicitObjectHitSet(const ExplicitObjectHitSet::Params *p) : count(0) {}
271 ExplicitObjectHitSet(const ExplicitObjectHitSet &o) : count(o.count),
272 hits(o.hits) {}
273
274 HitSet::Impl *clone() const override {
275 return new ExplicitObjectHitSet(*this);
276 }
277
278 HitSet::impl_type_t get_type() const override {
279 return HitSet::TYPE_EXPLICIT_OBJECT;
280 }
281 bool is_full() const override {
282 return false;
283 }
284 void insert(const hobject_t& o) override {
285 hits.insert(o);
286 ++count;
287 }
288 bool contains(const hobject_t& o) const override {
289 return hits.count(o);
290 }
291 unsigned insert_count() const override {
292 return count;
293 }
294 unsigned approx_unique_insert_count() const override {
295 return hits.size();
296 }
297 void encode(ceph::buffer::list &bl) const override {
298 ENCODE_START(1, 1, bl);
299 encode(count, bl);
300 encode(hits, bl);
301 ENCODE_FINISH(bl);
302 }
303 void decode(ceph::buffer::list::const_iterator& bl) override {
304 DECODE_START(1, bl);
305 decode(count, bl);
306 decode(hits, bl);
307 DECODE_FINISH(bl);
308 }
309 void dump(ceph::Formatter *f) const override;
310 static void generate_test_instances(std::list<ExplicitObjectHitSet*>& o) {
311 o.push_back(new ExplicitObjectHitSet);
312 o.push_back(new ExplicitObjectHitSet);
313 o.back()->insert(hobject_t());
314 o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
315 o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
316 }
317 };
318 WRITE_CLASS_ENCODER(ExplicitObjectHitSet)
319
320 /**
321 * use a bloom_filter to track hits to the set
322 */
323 class BloomHitSet : public HitSet::Impl {
324 compressible_bloom_filter bloom;
325
326 public:
327 HitSet::impl_type_t get_type() const override {
328 return HitSet::TYPE_BLOOM;
329 }
330
331 class Params : public HitSet::Params::Impl {
332 public:
333 HitSet::impl_type_t get_type() const override {
334 return HitSet::TYPE_BLOOM;
335 }
336 HitSet::Impl *get_new_impl() const override {
337 return new BloomHitSet;
338 }
339
340 uint32_t fpp_micro; ///< false positive probability / 1M
341 uint64_t target_size; ///< number of unique insertions we expect to this HitSet
342 uint64_t seed; ///< seed to use when initializing the bloom filter
343
344 Params()
345 : fpp_micro(0), target_size(0), seed(0) {}
346 Params(double fpp, uint64_t t, uint64_t s)
347 : fpp_micro(fpp * 1000000.0), target_size(t), seed(s) {}
348 Params(const Params &o)
349 : fpp_micro(o.fpp_micro),
350 target_size(o.target_size),
351 seed(o.seed) {}
352 ~Params() override {}
353
354 double get_fpp() const {
355 return (double)fpp_micro / 1000000.0;
356 }
357 void set_fpp(double f) {
358 fpp_micro = (unsigned)(llrintl(f * 1000000.0));
359 }
360
361 void encode(ceph::buffer::list& bl) const override {
362 ENCODE_START(1, 1, bl);
363 encode(fpp_micro, bl);
364 encode(target_size, bl);
365 encode(seed, bl);
366 ENCODE_FINISH(bl);
367 }
368 void decode(ceph::buffer::list::const_iterator& bl) override {
369 DECODE_START(1, bl);
370 decode(fpp_micro, bl);
371 decode(target_size, bl);
372 decode(seed, bl);
373 DECODE_FINISH(bl);
374 }
375 void dump(ceph::Formatter *f) const override;
376 void dump_stream(std::ostream& o) const override {
377 o << "false_positive_probability: "
378 << get_fpp() << ", target_size: " << target_size
379 << ", seed: " << seed;
380 }
381 static void generate_test_instances(std::list<Params*>& o) {
382 o.push_back(new Params);
383 o.push_back(new Params);
384 (*o.rbegin())->fpp_micro = 123456;
385 (*o.rbegin())->target_size = 300;
386 (*o.rbegin())->seed = 99;
387 }
388 };
389
390 BloomHitSet() {}
391 BloomHitSet(unsigned inserts, double fpp, int seed)
392 : bloom(inserts, fpp, seed)
393 {}
394 explicit BloomHitSet(const BloomHitSet::Params *p) : bloom(p->target_size,
395 p->get_fpp(),
396 p->seed)
397 {}
398
399 BloomHitSet(const BloomHitSet &o) {
400 // oh god
401 ceph::buffer::list bl;
402 o.encode(bl);
403 auto bli = std::cbegin(bl);
404 this->decode(bli);
405 }
406
407 HitSet::Impl *clone() const override {
408 return new BloomHitSet(*this);
409 }
410
411 bool is_full() const override {
412 return bloom.is_full();
413 }
414
415 void insert(const hobject_t& o) override {
416 bloom.insert(o.get_hash());
417 }
418 bool contains(const hobject_t& o) const override {
419 return bloom.contains(o.get_hash());
420 }
421 unsigned insert_count() const override {
422 return bloom.element_count();
423 }
424 unsigned approx_unique_insert_count() const override {
425 return bloom.approx_unique_element_count();
426 }
427 void seal() override {
428 // aim for a density of .5 (50% of bit set)
429 double pc = bloom.density() * 2.0;
430 if (pc < 1.0)
431 bloom.compress(pc);
432 }
433
434 void encode(ceph::buffer::list &bl) const override {
435 ENCODE_START(1, 1, bl);
436 encode(bloom, bl);
437 ENCODE_FINISH(bl);
438 }
439 void decode(ceph::buffer::list::const_iterator& bl) override {
440 DECODE_START(1, bl);
441 decode(bloom, bl);
442 DECODE_FINISH(bl);
443 }
444 void dump(ceph::Formatter *f) const override;
445 static void generate_test_instances(std::list<BloomHitSet*>& o) {
446 o.push_back(new BloomHitSet);
447 o.push_back(new BloomHitSet(10, .1, 1));
448 o.back()->insert(hobject_t());
449 o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
450 o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
451 }
452 };
453 WRITE_CLASS_ENCODER(BloomHitSet)
454
455 #endif