1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2013 Inktank <info@inktank.com>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
15 #ifndef CEPH_OSD_HITSET_H
16 #define CEPH_OSD_HITSET_H
18 #include <string_view>
20 #include <boost/scoped_ptr.hpp>
22 #include "include/encoding.h"
23 #include "include/unordered_set.h"
24 #include "common/bloom_filter.hpp"
25 #include "common/hobject.h"
28 * generic container for a HitSet
30 * Encapsulate a HitSetImpl of any type. Expose a generic interface
31 * to users and wrap the encoded object with a type so that it can be
32 * safely decoded later.
39 TYPE_EXPLICIT_HASH
= 1,
40 TYPE_EXPLICIT_OBJECT
= 2,
44 static std::string_view
get_type_name(impl_type_t t
) {
46 case TYPE_NONE
: return "none";
47 case TYPE_EXPLICIT_HASH
: return "explicit_hash";
48 case TYPE_EXPLICIT_OBJECT
: return "explicit_object";
49 case TYPE_BLOOM
: return "bloom";
50 default: return "???";
53 std::string_view
get_type_name() const {
55 return get_type_name(impl
->get_type());
56 return get_type_name(TYPE_NONE
);
59 /// abstract interface for a HitSet implementation
62 virtual impl_type_t
get_type() const = 0;
63 virtual bool is_full() const = 0;
64 virtual void insert(const hobject_t
& o
) = 0;
65 virtual bool contains(const hobject_t
& o
) const = 0;
66 virtual unsigned insert_count() const = 0;
67 virtual unsigned approx_unique_insert_count() const = 0;
68 virtual void encode(ceph::buffer::list
&bl
) const = 0;
69 virtual void decode(ceph::buffer::list::const_iterator
& p
) = 0;
70 virtual void dump(ceph::Formatter
*f
) const = 0;
71 virtual Impl
* clone() const = 0;
72 virtual void seal() {}
76 boost::scoped_ptr
<Impl
> impl
;
80 /// create an Impl* of the given type
81 bool create_impl(impl_type_t t
);
86 virtual impl_type_t
get_type() const = 0;
87 virtual HitSet::Impl
*get_new_impl() const = 0;
88 virtual void encode(ceph::buffer::list
&bl
) const {}
89 virtual void decode(ceph::buffer::list::const_iterator
& p
) {}
90 virtual void dump(ceph::Formatter
*f
) const {}
91 virtual void dump_stream(std::ostream
& o
) const {}
96 explicit Params(Impl
*i
) : impl(i
) {}
99 boost::scoped_ptr
<Params::Impl
> impl
;
101 impl_type_t
get_type() const {
103 return impl
->get_type();
107 Params(const Params
& o
) noexcept
;
108 const Params
& operator=(const Params
& o
);
110 void encode(ceph::buffer::list
&bl
) const;
111 void decode(ceph::buffer::list::const_iterator
& bl
);
112 void dump(ceph::Formatter
*f
) const;
113 static void generate_test_instances(std::list
<HitSet::Params
*>& o
);
115 friend std::ostream
& operator<<(std::ostream
& out
, const HitSet::Params
& p
);
118 HitSet() : impl(NULL
), sealed(false) {}
119 explicit HitSet(Impl
*i
) : impl(i
), sealed(false) {}
120 explicit HitSet(const HitSet::Params
& params
);
122 HitSet(const HitSet
& o
) {
125 impl
.reset(o
.impl
->clone());
129 const HitSet
& operator=(const HitSet
& o
) {
132 impl
.reset(o
.impl
->clone());
139 bool is_full() const {
140 return impl
->is_full();
142 /// insert a hash into the set
143 void insert(const hobject_t
& o
) {
146 /// query whether a hash is in the set
147 bool contains(const hobject_t
& o
) const {
148 return impl
->contains(o
);
151 unsigned insert_count() const {
152 return impl
->insert_count();
154 unsigned approx_unique_insert_count() const {
155 return impl
->approx_unique_insert_count();
158 ceph_assert(!sealed
);
163 void encode(ceph::buffer::list
&bl
) const;
164 void decode(ceph::buffer::list::const_iterator
& bl
);
165 void dump(ceph::Formatter
*f
) const;
166 static void generate_test_instances(std::list
<HitSet
*>& o
);
169 void reset_to_type(impl_type_t type
);
171 WRITE_CLASS_ENCODER(HitSet
)
172 WRITE_CLASS_ENCODER(HitSet::Params
)
174 typedef boost::shared_ptr
<HitSet
> HitSetRef
;
176 std::ostream
& operator<<(std::ostream
& out
, const HitSet::Params
& p
);
179 * explicitly enumerate hash hits in the set
181 class ExplicitHashHitSet
: public HitSet::Impl
{
183 ceph::unordered_set
<uint32_t> hits
;
185 class Params
: public HitSet::Params::Impl
{
187 HitSet::impl_type_t
get_type() const override
{
188 return HitSet::TYPE_EXPLICIT_HASH
;
190 HitSet::Impl
*get_new_impl() const override
{
191 return new ExplicitHashHitSet
;
193 static void generate_test_instances(std::list
<Params
*>& o
) {
194 o
.push_back(new Params
);
198 ExplicitHashHitSet() : count(0) {}
199 explicit ExplicitHashHitSet(const ExplicitHashHitSet::Params
*p
) : count(0) {}
200 ExplicitHashHitSet(const ExplicitHashHitSet
&o
) : count(o
.count
),
203 HitSet::Impl
*clone() const override
{
204 return new ExplicitHashHitSet(*this);
207 HitSet::impl_type_t
get_type() const override
{
208 return HitSet::TYPE_EXPLICIT_HASH
;
210 bool is_full() const override
{
213 void insert(const hobject_t
& o
) override
{
214 hits
.insert(o
.get_hash());
217 bool contains(const hobject_t
& o
) const override
{
218 return hits
.count(o
.get_hash());
220 unsigned insert_count() const override
{
223 unsigned approx_unique_insert_count() const override
{
226 void encode(ceph::buffer::list
&bl
) const override
{
227 ENCODE_START(1, 1, bl
);
232 void decode(ceph::buffer::list::const_iterator
&bl
) override
{
238 void dump(ceph::Formatter
*f
) const override
;
239 static void generate_test_instances(std::list
<ExplicitHashHitSet
*>& o
) {
240 o
.push_back(new ExplicitHashHitSet
);
241 o
.push_back(new ExplicitHashHitSet
);
242 o
.back()->insert(hobject_t());
243 o
.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP
, 123, 1, ""));
244 o
.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP
, 456, 1, ""));
247 WRITE_CLASS_ENCODER(ExplicitHashHitSet
)
250 * explicitly enumerate objects in the set
252 class ExplicitObjectHitSet
: public HitSet::Impl
{
254 ceph::unordered_set
<hobject_t
> hits
;
256 class Params
: public HitSet::Params::Impl
{
258 HitSet::impl_type_t
get_type() const override
{
259 return HitSet::TYPE_EXPLICIT_OBJECT
;
261 HitSet::Impl
*get_new_impl() const override
{
262 return new ExplicitObjectHitSet
;
264 static void generate_test_instances(std::list
<Params
*>& o
) {
265 o
.push_back(new Params
);
269 ExplicitObjectHitSet() : count(0) {}
270 explicit ExplicitObjectHitSet(const ExplicitObjectHitSet::Params
*p
) : count(0) {}
271 ExplicitObjectHitSet(const ExplicitObjectHitSet
&o
) : count(o
.count
),
274 HitSet::Impl
*clone() const override
{
275 return new ExplicitObjectHitSet(*this);
278 HitSet::impl_type_t
get_type() const override
{
279 return HitSet::TYPE_EXPLICIT_OBJECT
;
281 bool is_full() const override
{
284 void insert(const hobject_t
& o
) override
{
288 bool contains(const hobject_t
& o
) const override
{
289 return hits
.count(o
);
291 unsigned insert_count() const override
{
294 unsigned approx_unique_insert_count() const override
{
297 void encode(ceph::buffer::list
&bl
) const override
{
298 ENCODE_START(1, 1, bl
);
303 void decode(ceph::buffer::list::const_iterator
& bl
) override
{
309 void dump(ceph::Formatter
*f
) const override
;
310 static void generate_test_instances(std::list
<ExplicitObjectHitSet
*>& o
) {
311 o
.push_back(new ExplicitObjectHitSet
);
312 o
.push_back(new ExplicitObjectHitSet
);
313 o
.back()->insert(hobject_t());
314 o
.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP
, 123, 1, ""));
315 o
.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP
, 456, 1, ""));
318 WRITE_CLASS_ENCODER(ExplicitObjectHitSet
)
321 * use a bloom_filter to track hits to the set
323 class BloomHitSet
: public HitSet::Impl
{
324 compressible_bloom_filter bloom
;
327 HitSet::impl_type_t
get_type() const override
{
328 return HitSet::TYPE_BLOOM
;
331 class Params
: public HitSet::Params::Impl
{
333 HitSet::impl_type_t
get_type() const override
{
334 return HitSet::TYPE_BLOOM
;
336 HitSet::Impl
*get_new_impl() const override
{
337 return new BloomHitSet
;
340 uint32_t fpp_micro
; ///< false positive probability / 1M
341 uint64_t target_size
; ///< number of unique insertions we expect to this HitSet
342 uint64_t seed
; ///< seed to use when initializing the bloom filter
345 : fpp_micro(0), target_size(0), seed(0) {}
346 Params(double fpp
, uint64_t t
, uint64_t s
)
347 : fpp_micro(fpp
* 1000000.0), target_size(t
), seed(s
) {}
348 Params(const Params
&o
)
349 : fpp_micro(o
.fpp_micro
),
350 target_size(o
.target_size
),
352 ~Params() override
{}
354 double get_fpp() const {
355 return (double)fpp_micro
/ 1000000.0;
357 void set_fpp(double f
) {
358 fpp_micro
= (unsigned)(llrintl(f
* 1000000.0));
361 void encode(ceph::buffer::list
& bl
) const override
{
362 ENCODE_START(1, 1, bl
);
363 encode(fpp_micro
, bl
);
364 encode(target_size
, bl
);
368 void decode(ceph::buffer::list::const_iterator
& bl
) override
{
370 decode(fpp_micro
, bl
);
371 decode(target_size
, bl
);
375 void dump(ceph::Formatter
*f
) const override
;
376 void dump_stream(std::ostream
& o
) const override
{
377 o
<< "false_positive_probability: "
378 << get_fpp() << ", target_size: " << target_size
379 << ", seed: " << seed
;
381 static void generate_test_instances(std::list
<Params
*>& o
) {
382 o
.push_back(new Params
);
383 o
.push_back(new Params
);
384 (*o
.rbegin())->fpp_micro
= 123456;
385 (*o
.rbegin())->target_size
= 300;
386 (*o
.rbegin())->seed
= 99;
391 BloomHitSet(unsigned inserts
, double fpp
, int seed
)
392 : bloom(inserts
, fpp
, seed
)
394 explicit BloomHitSet(const BloomHitSet::Params
*p
) : bloom(p
->target_size
,
399 BloomHitSet(const BloomHitSet
&o
) {
401 ceph::buffer::list bl
;
403 auto bli
= std::cbegin(bl
);
407 HitSet::Impl
*clone() const override
{
408 return new BloomHitSet(*this);
411 bool is_full() const override
{
412 return bloom
.is_full();
415 void insert(const hobject_t
& o
) override
{
416 bloom
.insert(o
.get_hash());
418 bool contains(const hobject_t
& o
) const override
{
419 return bloom
.contains(o
.get_hash());
421 unsigned insert_count() const override
{
422 return bloom
.element_count();
424 unsigned approx_unique_insert_count() const override
{
425 return bloom
.approx_unique_element_count();
427 void seal() override
{
428 // aim for a density of .5 (50% of bit set)
429 double pc
= bloom
.density() * 2.0;
434 void encode(ceph::buffer::list
&bl
) const override
{
435 ENCODE_START(1, 1, bl
);
439 void decode(ceph::buffer::list::const_iterator
& bl
) override
{
444 void dump(ceph::Formatter
*f
) const override
;
445 static void generate_test_instances(std::list
<BloomHitSet
*>& o
) {
446 o
.push_back(new BloomHitSet
);
447 o
.push_back(new BloomHitSet(10, .1, 1));
448 o
.back()->insert(hobject_t());
449 o
.back()->insert(hobject_t("asdf", "", CEPH_NOSNAP
, 123, 1, ""));
450 o
.back()->insert(hobject_t("qwer", "", CEPH_NOSNAP
, 456, 1, ""));
453 WRITE_CLASS_ENCODER(BloomHitSet
)