]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/HitSet.h
update sources to v12.2.1
[ceph.git] / ceph / src / osd / HitSet.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2013 Inktank <info@inktank.com>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_OSD_HITSET_H
16 #define CEPH_OSD_HITSET_H
17
18 #include <boost/scoped_ptr.hpp>
19
20 #include "include/encoding.h"
21 #include "include/unordered_set.h"
22 #include "common/bloom_filter.hpp"
23 #include "common/hobject.h"
24
25 /**
26 * generic container for a HitSet
27 *
28 * Encapsulate a HitSetImpl of any type. Expose a generic interface
29 * to users and wrap the encoded object with a type so that it can be
30 * safely decoded later.
31 */
32
33 class HitSet {
34 public:
35 typedef enum {
36 TYPE_NONE = 0,
37 TYPE_EXPLICIT_HASH = 1,
38 TYPE_EXPLICIT_OBJECT = 2,
39 TYPE_BLOOM = 3
40 } impl_type_t;
41
42 static const char *get_type_name(impl_type_t t) {
43 switch (t) {
44 case TYPE_NONE: return "none";
45 case TYPE_EXPLICIT_HASH: return "explicit_hash";
46 case TYPE_EXPLICIT_OBJECT: return "explicit_object";
47 case TYPE_BLOOM: return "bloom";
48 default: return "???";
49 }
50 }
51 const char *get_type_name() const {
52 if (impl)
53 return get_type_name(impl->get_type());
54 return get_type_name(TYPE_NONE);
55 }
56
57 /// abstract interface for a HitSet implementation
58 class Impl {
59 public:
60 virtual impl_type_t get_type() const = 0;
61 virtual bool is_full() const = 0;
62 virtual void insert(const hobject_t& o) = 0;
63 virtual bool contains(const hobject_t& o) const = 0;
64 virtual unsigned insert_count() const = 0;
65 virtual unsigned approx_unique_insert_count() const = 0;
66 virtual void encode(bufferlist &bl) const = 0;
67 virtual void decode(bufferlist::iterator& p) = 0;
68 virtual void dump(Formatter *f) const = 0;
69 virtual Impl* clone() const = 0;
70 virtual void seal() {}
71 virtual ~Impl() {}
72 };
73
74 boost::scoped_ptr<Impl> impl;
75 bool sealed;
76
77 class Params {
78 /// create an Impl* of the given type
79 bool create_impl(impl_type_t t);
80
81 public:
82 class Impl {
83 public:
84 virtual impl_type_t get_type() const = 0;
85 virtual HitSet::Impl *get_new_impl() const = 0;
86 virtual void encode(bufferlist &bl) const {}
87 virtual void decode(bufferlist::iterator& p) {}
88 virtual void dump(Formatter *f) const {}
89 virtual void dump_stream(ostream& o) const {}
90 virtual ~Impl() {}
91 };
92
93 Params() {}
94 explicit Params(Impl *i) : impl(i) {}
95 virtual ~Params() {}
96
97 boost::scoped_ptr<Params::Impl> impl;
98
99 impl_type_t get_type() const {
100 if (impl)
101 return impl->get_type();
102 return TYPE_NONE;
103 }
104
105 Params(const Params& o);
106 const Params& operator=(const Params& o);
107
108 void encode(bufferlist &bl) const;
109 void decode(bufferlist::iterator &bl);
110 void dump(Formatter *f) const;
111 static void generate_test_instances(list<HitSet::Params*>& o);
112
113 friend ostream& operator<<(ostream& out, const HitSet::Params& p);
114 };
115
116 HitSet() : impl(NULL), sealed(false) {}
117 explicit HitSet(Impl *i) : impl(i), sealed(false) {}
118 explicit HitSet(const HitSet::Params& params);
119
120 HitSet(const HitSet& o) {
121 sealed = o.sealed;
122 if (o.impl)
123 impl.reset(o.impl->clone());
124 else
125 impl.reset(NULL);
126 }
127 const HitSet& operator=(const HitSet& o) {
128 sealed = o.sealed;
129 if (o.impl)
130 impl.reset(o.impl->clone());
131 else
132 impl.reset(NULL);
133 return *this;
134 }
135
136
137 bool is_full() const {
138 return impl->is_full();
139 }
140 /// insert a hash into the set
141 void insert(const hobject_t& o) {
142 impl->insert(o);
143 }
144 /// query whether a hash is in the set
145 bool contains(const hobject_t& o) const {
146 return impl->contains(o);
147 }
148
149 unsigned insert_count() const {
150 return impl->insert_count();
151 }
152 unsigned approx_unique_insert_count() const {
153 return impl->approx_unique_insert_count();
154 }
155 void seal() {
156 assert(!sealed);
157 sealed = true;
158 impl->seal();
159 }
160
161 void encode(bufferlist &bl) const;
162 void decode(bufferlist::iterator &bl);
163 void dump(Formatter *f) const;
164 static void generate_test_instances(list<HitSet*>& o);
165
166 private:
167 void reset_to_type(impl_type_t type);
168 };
169 WRITE_CLASS_ENCODER(HitSet)
170 WRITE_CLASS_ENCODER(HitSet::Params)
171
172 typedef boost::shared_ptr<HitSet> HitSetRef;
173
174 ostream& operator<<(ostream& out, const HitSet::Params& p);
175
176 /**
177 * explicitly enumerate hash hits in the set
178 */
179 class ExplicitHashHitSet : public HitSet::Impl {
180 uint64_t count;
181 ceph::unordered_set<uint32_t> hits;
182 public:
183 class Params : public HitSet::Params::Impl {
184 public:
185 HitSet::impl_type_t get_type() const override {
186 return HitSet::TYPE_EXPLICIT_HASH;
187 }
188 HitSet::Impl *get_new_impl() const override {
189 return new ExplicitHashHitSet;
190 }
191 static void generate_test_instances(list<Params*>& o) {
192 o.push_back(new Params);
193 }
194 };
195
196 ExplicitHashHitSet() : count(0) {}
197 explicit ExplicitHashHitSet(const ExplicitHashHitSet::Params *p) : count(0) {}
198 ExplicitHashHitSet(const ExplicitHashHitSet &o) : count(o.count),
199 hits(o.hits) {}
200
201 HitSet::Impl *clone() const override {
202 return new ExplicitHashHitSet(*this);
203 }
204
205 HitSet::impl_type_t get_type() const override {
206 return HitSet::TYPE_EXPLICIT_HASH;
207 }
208 bool is_full() const override {
209 return false;
210 }
211 void insert(const hobject_t& o) override {
212 hits.insert(o.get_hash());
213 ++count;
214 }
215 bool contains(const hobject_t& o) const override {
216 return hits.count(o.get_hash());
217 }
218 unsigned insert_count() const override {
219 return count;
220 }
221 unsigned approx_unique_insert_count() const override {
222 return hits.size();
223 }
224 void encode(bufferlist &bl) const override {
225 ENCODE_START(1, 1, bl);
226 ::encode(count, bl);
227 ::encode(hits, bl);
228 ENCODE_FINISH(bl);
229 }
230 void decode(bufferlist::iterator &bl) override {
231 DECODE_START(1, bl);
232 ::decode(count, bl);
233 ::decode(hits, bl);
234 DECODE_FINISH(bl);
235 }
236 void dump(Formatter *f) const override;
237 static void generate_test_instances(list<ExplicitHashHitSet*>& o) {
238 o.push_back(new ExplicitHashHitSet);
239 o.push_back(new ExplicitHashHitSet);
240 o.back()->insert(hobject_t());
241 o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
242 o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
243 }
244 };
245 WRITE_CLASS_ENCODER(ExplicitHashHitSet)
246
247 /**
248 * explicitly enumerate objects in the set
249 */
250 class ExplicitObjectHitSet : public HitSet::Impl {
251 uint64_t count;
252 ceph::unordered_set<hobject_t> hits;
253 public:
254 class Params : public HitSet::Params::Impl {
255 public:
256 HitSet::impl_type_t get_type() const override {
257 return HitSet::TYPE_EXPLICIT_OBJECT;
258 }
259 HitSet::Impl *get_new_impl() const override {
260 return new ExplicitObjectHitSet;
261 }
262 static void generate_test_instances(list<Params*>& o) {
263 o.push_back(new Params);
264 }
265 };
266
267 ExplicitObjectHitSet() : count(0) {}
268 explicit ExplicitObjectHitSet(const ExplicitObjectHitSet::Params *p) : count(0) {}
269 ExplicitObjectHitSet(const ExplicitObjectHitSet &o) : count(o.count),
270 hits(o.hits) {}
271
272 HitSet::Impl *clone() const override {
273 return new ExplicitObjectHitSet(*this);
274 }
275
276 HitSet::impl_type_t get_type() const override {
277 return HitSet::TYPE_EXPLICIT_OBJECT;
278 }
279 bool is_full() const override {
280 return false;
281 }
282 void insert(const hobject_t& o) override {
283 hits.insert(o);
284 ++count;
285 }
286 bool contains(const hobject_t& o) const override {
287 return hits.count(o);
288 }
289 unsigned insert_count() const override {
290 return count;
291 }
292 unsigned approx_unique_insert_count() const override {
293 return hits.size();
294 }
295 void encode(bufferlist &bl) const override {
296 ENCODE_START(1, 1, bl);
297 ::encode(count, bl);
298 ::encode(hits, bl);
299 ENCODE_FINISH(bl);
300 }
301 void decode(bufferlist::iterator &bl) override {
302 DECODE_START(1, bl);
303 ::decode(count, bl);
304 ::decode(hits, bl);
305 DECODE_FINISH(bl);
306 }
307 void dump(Formatter *f) const override;
308 static void generate_test_instances(list<ExplicitObjectHitSet*>& o) {
309 o.push_back(new ExplicitObjectHitSet);
310 o.push_back(new ExplicitObjectHitSet);
311 o.back()->insert(hobject_t());
312 o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
313 o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
314 }
315 };
316 WRITE_CLASS_ENCODER(ExplicitObjectHitSet)
317
318 /**
319 * use a bloom_filter to track hits to the set
320 */
321 class BloomHitSet : public HitSet::Impl {
322 compressible_bloom_filter bloom;
323
324 public:
325 HitSet::impl_type_t get_type() const override {
326 return HitSet::TYPE_BLOOM;
327 }
328
329 class Params : public HitSet::Params::Impl {
330 public:
331 HitSet::impl_type_t get_type() const override {
332 return HitSet::TYPE_BLOOM;
333 }
334 HitSet::Impl *get_new_impl() const override {
335 return new BloomHitSet;
336 }
337
338 uint32_t fpp_micro; ///< false positive probability / 1M
339 uint64_t target_size; ///< number of unique insertions we expect to this HitSet
340 uint64_t seed; ///< seed to use when initializing the bloom filter
341
342 Params()
343 : fpp_micro(0), target_size(0), seed(0) {}
344 Params(double fpp, uint64_t t, uint64_t s)
345 : fpp_micro(fpp * 1000000.0), target_size(t), seed(s) {}
346 Params(const Params &o)
347 : fpp_micro(o.fpp_micro),
348 target_size(o.target_size),
349 seed(o.seed) {}
350 ~Params() override {}
351
352 double get_fpp() const {
353 return (double)fpp_micro / 1000000.0;
354 }
355 void set_fpp(double f) {
356 fpp_micro = (unsigned)(llrintl(f * (double)1000000.0));
357 }
358
359 void encode(bufferlist& bl) const override {
360 ENCODE_START(1, 1, bl);
361 ::encode(fpp_micro, bl);
362 ::encode(target_size, bl);
363 ::encode(seed, bl);
364 ENCODE_FINISH(bl);
365 }
366 void decode(bufferlist::iterator& bl) override {
367 DECODE_START(1, bl);
368 ::decode(fpp_micro, bl);
369 ::decode(target_size, bl);
370 ::decode(seed, bl);
371 DECODE_FINISH(bl);
372 }
373 void dump(Formatter *f) const override;
374 void dump_stream(ostream& o) const override {
375 o << "false_positive_probability: "
376 << get_fpp() << ", target_size: " << target_size
377 << ", seed: " << seed;
378 }
379 static void generate_test_instances(list<Params*>& o) {
380 o.push_back(new Params);
381 o.push_back(new Params);
382 (*o.rbegin())->fpp_micro = 123456;
383 (*o.rbegin())->target_size = 300;
384 (*o.rbegin())->seed = 99;
385 }
386 };
387
388 BloomHitSet() {}
389 BloomHitSet(unsigned inserts, double fpp, int seed)
390 : bloom(inserts, fpp, seed)
391 {}
392 explicit BloomHitSet(const BloomHitSet::Params *p) : bloom(p->target_size,
393 p->get_fpp(),
394 p->seed)
395 {}
396
397 BloomHitSet(const BloomHitSet &o) {
398 // oh god
399 bufferlist bl;
400 o.encode(bl);
401 bufferlist::iterator bli = bl.begin();
402 this->decode(bli);
403 }
404
405 HitSet::Impl *clone() const override {
406 return new BloomHitSet(*this);
407 }
408
409 bool is_full() const override {
410 return bloom.is_full();
411 }
412
413 void insert(const hobject_t& o) override {
414 bloom.insert(o.get_hash());
415 }
416 bool contains(const hobject_t& o) const override {
417 return bloom.contains(o.get_hash());
418 }
419 unsigned insert_count() const override {
420 return bloom.element_count();
421 }
422 unsigned approx_unique_insert_count() const override {
423 return bloom.approx_unique_element_count();
424 }
425 void seal() override {
426 // aim for a density of .5 (50% of bit set)
427 double pc = (double)bloom.density() * 2.0;
428 if (pc < 1.0)
429 bloom.compress(pc);
430 }
431
432 void encode(bufferlist &bl) const override {
433 ENCODE_START(1, 1, bl);
434 ::encode(bloom, bl);
435 ENCODE_FINISH(bl);
436 }
437 void decode(bufferlist::iterator &bl) override {
438 DECODE_START(1, bl);
439 ::decode(bloom, bl);
440 DECODE_FINISH(bl);
441 }
442 void dump(Formatter *f) const override;
443 static void generate_test_instances(list<BloomHitSet*>& o) {
444 o.push_back(new BloomHitSet);
445 o.push_back(new BloomHitSet(10, .1, 1));
446 o.back()->insert(hobject_t());
447 o.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP, 123, 1, ""));
448 o.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP, 456, 1, ""));
449 }
450 };
451 WRITE_CLASS_ENCODER(BloomHitSet)
452
453 #endif